#!/usr/bin/env python """ Check what categories exist in RSS feeds and articles """ from pymongo import MongoClient from config import Config client = MongoClient(Config.MONGODB_URI) db = client[Config.DB_NAME] print("\n" + "="*70) print("šŸ“Š Category Analysis") print("="*70) # Check RSS feed categories print("\nšŸ”¹ RSS Feed Categories:") rss_feeds_collection = db['rss_feeds'] feed_categories = rss_feeds_collection.distinct('category') print(f" Unique categories: {feed_categories}") print(f" Total: {len(feed_categories)} categories") # Count feeds per category print("\n Feeds per category:") for cat in feed_categories: count = rss_feeds_collection.count_documents({'category': cat}) feeds = list(rss_feeds_collection.find({'category': cat}, {'name': 1, '_id': 0})) print(f" {cat}: {count} feeds") for feed in feeds: print(f" - {feed['name']}") # Check article categories print("\nšŸ”¹ Article Categories:") articles_collection = db['articles'] article_categories = articles_collection.distinct('category') print(f" Unique categories: {article_categories}") print(f" Total: {len(article_categories)} categories") # Count articles per category print("\n Articles per category:") for cat in article_categories: count = articles_collection.count_documents({'category': cat}) with_summary = articles_collection.count_documents({'category': cat, 'summary': {'$exists': True, '$ne': None}}) print(f" {cat}: {count} articles ({with_summary} with summaries)") # Check subscriber categories print("\nšŸ”¹ Subscriber Categories:") subscribers_collection = db['subscribers'] total_subs = subscribers_collection.count_documents({'status': 'active'}) print(f" Total active subscribers: {total_subs}") # Sample a few subscribers to see their categories sample_subs = list(subscribers_collection.find({'status': 'active'}, {'email': 1, 'categories': 1, '_id': 0}).limit(5)) print("\n Sample subscriber preferences:") for sub in sample_subs: cats = sub.get('categories', 'None') print(f" {sub['email']}: {cats}") print("\n" + "="*70) print("āœ… Analysis Complete") print("="*70 + "\n")