#!/usr/bin/env python """ Quick script to check what RSS feeds are in the database """ from pymongo import MongoClient import os import sys # Add parent directory to path to import from backend sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'backend')) try: from dotenv import load_dotenv load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), '..', 'backend', '.env')) except: pass # MongoDB setup MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/') DB_NAME = 'munich_news' print(f"Connecting to: {MONGODB_URI}") print(f"Database: {DB_NAME}\n") try: client = MongoClient(MONGODB_URI, serverSelectionTimeoutMS=5000) # Test connection client.server_info() print("āœ“ Connected to MongoDB\n") db = client[DB_NAME] rss_feeds_collection = db['rss_feeds'] # Get all feeds feeds = list(rss_feeds_collection.find()) if not feeds: print("āŒ No RSS feeds found in database\n") print("Add feeds using the API:") print(" curl -X POST http://localhost:5001/api/rss-feeds \\") print(" -H 'Content-Type: application/json' \\") print(" -d '{\"name\": \"Feed Name\", \"url\": \"https://example.com/rss\"}'") sys.exit(1) print(f"Found {len(feeds)} RSS feed(s):\n") print("="*80) for i, feed in enumerate(feeds, 1): print(f"\n{i}. {feed.get('name', 'Unknown')}") print(f" URL: {feed.get('url', 'N/A')}") print(f" Active: {feed.get('active', True)}") print(f" Created: {feed.get('created_at', 'N/A')}") print(f" ID: {feed.get('_id', 'N/A')}") print("\n" + "="*80) # Check articles articles_collection = db['articles'] total_articles = articles_collection.count_documents({}) crawled_articles = articles_collection.count_documents({'full_content': {'$exists': True}}) print(f"\nArticles in database:") print(f" Total: {total_articles}") print(f" With full content: {crawled_articles}") print(f" Without full content: {total_articles - crawled_articles}") if total_articles > 0: print("\nSample article:") sample = articles_collection.find_one() print(f" Title: {sample.get('title', 'N/A')[:60]}") print(f" Link: {sample.get('link', 'N/A')}") print(f" Has full_content: {bool(sample.get('full_content'))}") print(f" Word count: {sample.get('word_count', 'N/A')}") print("\nāœ“ Database check complete!") except Exception as e: print(f"āŒ Error: {e}") sys.exit(1)