diff --git a/debug_categories.py b/debug_categories.py new file mode 100644 index 0000000..1382035 --- /dev/null +++ b/debug_categories.py @@ -0,0 +1,35 @@ + +from pymongo import MongoClient +from datetime import datetime, timedelta +import os + +# Connect to MongoDB +mongo_uri = os.getenv('MONGODB_URI', 'mongodb://mongodb:27017/') +client = MongoClient(mongo_uri) +db = client['munich_news'] +articles = db['articles'] +subscribers = db['subscribers'] + +print("--- Distinct Categories in Articles Collection ---") +categories = articles.distinct('category') +print(categories) + +print("\n--- Recent Article Counts by Category (Last 24h) ---") +yesterday = datetime.utcnow() - timedelta(hours=24) +recent_articles = articles.find({'created_at': {'$gte': yesterday}}) +category_counts = {} +for art in recent_articles: + cat = art.get('category', 'unknown') + category_counts[cat] = category_counts.get(cat, 0) + 1 + +for cat, count in category_counts.items(): + print(f"{cat}: {count}") + +print("\n--- Subscriber Preferences ---") +for sub in subscribers.find(): + print(f"Email: {sub.get('email')}, Categories: {sub.get('categories')}") + +print("\n--- RSS Feeds ---") +rss_feeds = db['rss_feeds'] +for feed in rss_feeds.find(): + print(f"Name: {feed.get('name')}, URL: {feed.get('url')}, Category: {feed.get('category')}, Active: {feed.get('active')}") diff --git a/news_crawler/crawler_service.py b/news_crawler/crawler_service.py index 9f60221..1540a71 100644 --- a/news_crawler/crawler_service.py +++ b/news_crawler/crawler_service.py @@ -462,7 +462,11 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10 except Exception as e: print(f" ✗ Error processing feed {feed_name}: {e}") - return 0 + return { + 'crawled': 0, + 'summarized': 0, + 'failed_summaries': 0 + } def crawl_all_feeds(max_articles_per_feed=10):