This commit is contained in:
2025-11-10 19:13:33 +01:00
commit ac5738c29d
64 changed files with 9445 additions and 0 deletions

View File

@@ -0,0 +1,79 @@
#!/usr/bin/env python
"""
Quick script to check what RSS feeds are in the database
"""
from pymongo import MongoClient
import os
import sys
# Add parent directory to path to import from backend
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'backend'))
try:
from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), '..', 'backend', '.env'))
except:
pass
# MongoDB setup
MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
DB_NAME = 'munich_news'
print(f"Connecting to: {MONGODB_URI}")
print(f"Database: {DB_NAME}\n")
try:
client = MongoClient(MONGODB_URI, serverSelectionTimeoutMS=5000)
# Test connection
client.server_info()
print("✓ Connected to MongoDB\n")
db = client[DB_NAME]
rss_feeds_collection = db['rss_feeds']
# Get all feeds
feeds = list(rss_feeds_collection.find())
if not feeds:
print("❌ No RSS feeds found in database\n")
print("Add feeds using the API:")
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
print(" -H 'Content-Type: application/json' \\")
print(" -d '{\"name\": \"Feed Name\", \"url\": \"https://example.com/rss\"}'")
sys.exit(1)
print(f"Found {len(feeds)} RSS feed(s):\n")
print("="*80)
for i, feed in enumerate(feeds, 1):
print(f"\n{i}. {feed.get('name', 'Unknown')}")
print(f" URL: {feed.get('url', 'N/A')}")
print(f" Active: {feed.get('active', True)}")
print(f" Created: {feed.get('created_at', 'N/A')}")
print(f" ID: {feed.get('_id', 'N/A')}")
print("\n" + "="*80)
# Check articles
articles_collection = db['articles']
total_articles = articles_collection.count_documents({})
crawled_articles = articles_collection.count_documents({'full_content': {'$exists': True}})
print(f"\nArticles in database:")
print(f" Total: {total_articles}")
print(f" With full content: {crawled_articles}")
print(f" Without full content: {total_articles - crawled_articles}")
if total_articles > 0:
print("\nSample article:")
sample = articles_collection.find_one()
print(f" Title: {sample.get('title', 'N/A')[:60]}")
print(f" Link: {sample.get('link', 'N/A')}")
print(f" Has full_content: {bool(sample.get('full_content'))}")
print(f" Word count: {sample.get('word_count', 'N/A')}")
print("\n✓ Database check complete!")
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)