80 lines
2.6 KiB
Python
80 lines
2.6 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Quick script to check what RSS feeds are in the database
|
|
"""
|
|
from pymongo import MongoClient
|
|
import os
|
|
import sys
|
|
|
|
# Add parent directory to path to import from backend
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'backend'))
|
|
|
|
try:
|
|
from dotenv import load_dotenv
|
|
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), '..', 'backend', '.env'))
|
|
except:
|
|
pass
|
|
|
|
# MongoDB setup
|
|
MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
|
|
DB_NAME = 'munich_news'
|
|
|
|
print(f"Connecting to: {MONGODB_URI}")
|
|
print(f"Database: {DB_NAME}\n")
|
|
|
|
try:
|
|
client = MongoClient(MONGODB_URI, serverSelectionTimeoutMS=5000)
|
|
# Test connection
|
|
client.server_info()
|
|
print("✓ Connected to MongoDB\n")
|
|
|
|
db = client[DB_NAME]
|
|
rss_feeds_collection = db['rss_feeds']
|
|
|
|
# Get all feeds
|
|
feeds = list(rss_feeds_collection.find())
|
|
|
|
if not feeds:
|
|
print("❌ No RSS feeds found in database\n")
|
|
print("Add feeds using the API:")
|
|
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
|
|
print(" -H 'Content-Type: application/json' \\")
|
|
print(" -d '{\"name\": \"Feed Name\", \"url\": \"https://example.com/rss\"}'")
|
|
sys.exit(1)
|
|
|
|
print(f"Found {len(feeds)} RSS feed(s):\n")
|
|
print("="*80)
|
|
|
|
for i, feed in enumerate(feeds, 1):
|
|
print(f"\n{i}. {feed.get('name', 'Unknown')}")
|
|
print(f" URL: {feed.get('url', 'N/A')}")
|
|
print(f" Active: {feed.get('active', True)}")
|
|
print(f" Created: {feed.get('created_at', 'N/A')}")
|
|
print(f" ID: {feed.get('_id', 'N/A')}")
|
|
|
|
print("\n" + "="*80)
|
|
|
|
# Check articles
|
|
articles_collection = db['articles']
|
|
total_articles = articles_collection.count_documents({})
|
|
crawled_articles = articles_collection.count_documents({'full_content': {'$exists': True}})
|
|
|
|
print(f"\nArticles in database:")
|
|
print(f" Total: {total_articles}")
|
|
print(f" With full content: {crawled_articles}")
|
|
print(f" Without full content: {total_articles - crawled_articles}")
|
|
|
|
if total_articles > 0:
|
|
print("\nSample article:")
|
|
sample = articles_collection.find_one()
|
|
print(f" Title: {sample.get('title', 'N/A')[:60]}")
|
|
print(f" Link: {sample.get('link', 'N/A')}")
|
|
print(f" Has full_content: {bool(sample.get('full_content'))}")
|
|
print(f" Word count: {sample.get('word_count', 'N/A')}")
|
|
|
|
print("\n✓ Database check complete!")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
sys.exit(1)
|