update
This commit is contained in:
79
news_crawler/check_database.py
Normal file
79
news_crawler/check_database.py
Normal file
@@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Quick script to check what RSS feeds are in the database
|
||||
"""
|
||||
from pymongo import MongoClient
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent directory to path to import from backend
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'backend'))
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), '..', 'backend', '.env'))
|
||||
except:
|
||||
pass
|
||||
|
||||
# MongoDB setup
|
||||
MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
|
||||
DB_NAME = 'munich_news'
|
||||
|
||||
print(f"Connecting to: {MONGODB_URI}")
|
||||
print(f"Database: {DB_NAME}\n")
|
||||
|
||||
try:
|
||||
client = MongoClient(MONGODB_URI, serverSelectionTimeoutMS=5000)
|
||||
# Test connection
|
||||
client.server_info()
|
||||
print("✓ Connected to MongoDB\n")
|
||||
|
||||
db = client[DB_NAME]
|
||||
rss_feeds_collection = db['rss_feeds']
|
||||
|
||||
# Get all feeds
|
||||
feeds = list(rss_feeds_collection.find())
|
||||
|
||||
if not feeds:
|
||||
print("❌ No RSS feeds found in database\n")
|
||||
print("Add feeds using the API:")
|
||||
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
|
||||
print(" -H 'Content-Type: application/json' \\")
|
||||
print(" -d '{\"name\": \"Feed Name\", \"url\": \"https://example.com/rss\"}'")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Found {len(feeds)} RSS feed(s):\n")
|
||||
print("="*80)
|
||||
|
||||
for i, feed in enumerate(feeds, 1):
|
||||
print(f"\n{i}. {feed.get('name', 'Unknown')}")
|
||||
print(f" URL: {feed.get('url', 'N/A')}")
|
||||
print(f" Active: {feed.get('active', True)}")
|
||||
print(f" Created: {feed.get('created_at', 'N/A')}")
|
||||
print(f" ID: {feed.get('_id', 'N/A')}")
|
||||
|
||||
print("\n" + "="*80)
|
||||
|
||||
# Check articles
|
||||
articles_collection = db['articles']
|
||||
total_articles = articles_collection.count_documents({})
|
||||
crawled_articles = articles_collection.count_documents({'full_content': {'$exists': True}})
|
||||
|
||||
print(f"\nArticles in database:")
|
||||
print(f" Total: {total_articles}")
|
||||
print(f" With full content: {crawled_articles}")
|
||||
print(f" Without full content: {total_articles - crawled_articles}")
|
||||
|
||||
if total_articles > 0:
|
||||
print("\nSample article:")
|
||||
sample = articles_collection.find_one()
|
||||
print(f" Title: {sample.get('title', 'N/A')[:60]}")
|
||||
print(f" Link: {sample.get('link', 'N/A')}")
|
||||
print(f" Has full_content: {bool(sample.get('full_content'))}")
|
||||
print(f" Word count: {sample.get('word_count', 'N/A')}")
|
||||
|
||||
print("\n✓ Database check complete!")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user