From 1c0926a7f05da991d2918b5c1f0cd0f4061edc58 Mon Sep 17 00:00:00 2001 From: Dongho Kim Date: Thu, 20 Nov 2025 19:15:41 +0100 Subject: [PATCH] update --- backend/add_categories_to_feeds.py | 77 ++++++++++++++++++++++++++++++ backend/check_categories.py | 60 +++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 backend/add_categories_to_feeds.py create mode 100644 backend/check_categories.py diff --git a/backend/add_categories_to_feeds.py b/backend/add_categories_to_feeds.py new file mode 100644 index 0000000..a094df2 --- /dev/null +++ b/backend/add_categories_to_feeds.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +""" +Script to add categories to existing RSS feeds +""" +from pymongo import MongoClient +from config import Config + +client = MongoClient(Config.MONGODB_URI) +db = client[Config.DB_NAME] +rss_feeds_collection = db['rss_feeds'] + +print("\n" + "="*70) +print("๐Ÿ“ก Adding Categories to RSS Feeds") +print("="*70) + +# Get all feeds +all_feeds = list(rss_feeds_collection.find()) +print(f"\nFound {len(all_feeds)} RSS feeds") + +# Category mapping based on feed names/URLs +category_mapping = { + 'mรผnchen': 'local', + 'munich': 'local', + 'lokales': 'local', + 'sport': 'sports', + 'fussball': 'sports', + 'fc bayern': 'sports', + 'wissenschaft': 'science', + 'science': 'science', + 'tech': 'science', + 'technologie': 'science', +} + +updated = 0 +for feed in all_feeds: + name = feed.get('name', '').lower() + url = feed.get('url', '').lower() + current_category = feed.get('category') + + # Skip if already has a category + if current_category: + print(f" โœ“ {feed['name']}: Already has category '{current_category}'") + continue + + # Try to determine category from name or URL + detected_category = 'general' # Default + + for keyword, category in category_mapping.items(): + if keyword in name or keyword in url: + detected_category = category + break + + # Update the feed + rss_feeds_collection.update_one( + {'_id': feed['_id']}, + {'$set': {'category': detected_category}} + ) + + print(f" โœ“ {feed['name']}: Set category to '{detected_category}'") + updated += 1 + +print("\n" + "="*70) +print("๐Ÿ“Š Summary") +print("="*70) +print(f"โœ“ Updated: {updated} feeds") +print(f"โœ“ Already had categories: {len(all_feeds) - updated} feeds") +print("="*70 + "\n") + +# Show final category distribution +print("Category distribution:") +categories = rss_feeds_collection.aggregate([ + {'$group': {'_id': '$category', 'count': {'$sum': 1}}} +]) +for cat in categories: + print(f" {cat['_id']}: {cat['count']} feeds") + +print("\nโœ… Done! Now run the migration script to update subscriber categories.") diff --git a/backend/check_categories.py b/backend/check_categories.py new file mode 100644 index 0000000..924171b --- /dev/null +++ b/backend/check_categories.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +""" +Check what categories exist in RSS feeds and articles +""" +from pymongo import MongoClient +from config import Config + +client = MongoClient(Config.MONGODB_URI) +db = client[Config.DB_NAME] + +print("\n" + "="*70) +print("๐Ÿ“Š Category Analysis") +print("="*70) + +# Check RSS feed categories +print("\n๐Ÿ”น RSS Feed Categories:") +rss_feeds_collection = db['rss_feeds'] +feed_categories = rss_feeds_collection.distinct('category') +print(f" Unique categories: {feed_categories}") +print(f" Total: {len(feed_categories)} categories") + +# Count feeds per category +print("\n Feeds per category:") +for cat in feed_categories: + count = rss_feeds_collection.count_documents({'category': cat}) + feeds = list(rss_feeds_collection.find({'category': cat}, {'name': 1, '_id': 0})) + print(f" {cat}: {count} feeds") + for feed in feeds: + print(f" - {feed['name']}") + +# Check article categories +print("\n๐Ÿ”น Article Categories:") +articles_collection = db['articles'] +article_categories = articles_collection.distinct('category') +print(f" Unique categories: {article_categories}") +print(f" Total: {len(article_categories)} categories") + +# Count articles per category +print("\n Articles per category:") +for cat in article_categories: + count = articles_collection.count_documents({'category': cat}) + with_summary = articles_collection.count_documents({'category': cat, 'summary': {'$exists': True, '$ne': None}}) + print(f" {cat}: {count} articles ({with_summary} with summaries)") + +# Check subscriber categories +print("\n๐Ÿ”น Subscriber Categories:") +subscribers_collection = db['subscribers'] +total_subs = subscribers_collection.count_documents({'status': 'active'}) +print(f" Total active subscribers: {total_subs}") + +# Sample a few subscribers to see their categories +sample_subs = list(subscribers_collection.find({'status': 'active'}, {'email': 1, 'categories': 1, '_id': 0}).limit(5)) +print("\n Sample subscriber preferences:") +for sub in sample_subs: + cats = sub.get('categories', 'None') + print(f" {sub['email']}: {cats}") + +print("\n" + "="*70) +print("โœ… Analysis Complete") +print("="*70 + "\n")