update
This commit is contained in:
77
backend/add_categories_to_feeds.py
Normal file
77
backend/add_categories_to_feeds.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Script to add categories to existing RSS feeds
|
||||||
|
"""
|
||||||
|
from pymongo import MongoClient
|
||||||
|
from config import Config
|
||||||
|
|
||||||
|
client = MongoClient(Config.MONGODB_URI)
|
||||||
|
db = client[Config.DB_NAME]
|
||||||
|
rss_feeds_collection = db['rss_feeds']
|
||||||
|
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("📡 Adding Categories to RSS Feeds")
|
||||||
|
print("="*70)
|
||||||
|
|
||||||
|
# Get all feeds
|
||||||
|
all_feeds = list(rss_feeds_collection.find())
|
||||||
|
print(f"\nFound {len(all_feeds)} RSS feeds")
|
||||||
|
|
||||||
|
# Category mapping based on feed names/URLs
|
||||||
|
category_mapping = {
|
||||||
|
'münchen': 'local',
|
||||||
|
'munich': 'local',
|
||||||
|
'lokales': 'local',
|
||||||
|
'sport': 'sports',
|
||||||
|
'fussball': 'sports',
|
||||||
|
'fc bayern': 'sports',
|
||||||
|
'wissenschaft': 'science',
|
||||||
|
'science': 'science',
|
||||||
|
'tech': 'science',
|
||||||
|
'technologie': 'science',
|
||||||
|
}
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
for feed in all_feeds:
|
||||||
|
name = feed.get('name', '').lower()
|
||||||
|
url = feed.get('url', '').lower()
|
||||||
|
current_category = feed.get('category')
|
||||||
|
|
||||||
|
# Skip if already has a category
|
||||||
|
if current_category:
|
||||||
|
print(f" ✓ {feed['name']}: Already has category '{current_category}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Try to determine category from name or URL
|
||||||
|
detected_category = 'general' # Default
|
||||||
|
|
||||||
|
for keyword, category in category_mapping.items():
|
||||||
|
if keyword in name or keyword in url:
|
||||||
|
detected_category = category
|
||||||
|
break
|
||||||
|
|
||||||
|
# Update the feed
|
||||||
|
rss_feeds_collection.update_one(
|
||||||
|
{'_id': feed['_id']},
|
||||||
|
{'$set': {'category': detected_category}}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f" ✓ {feed['name']}: Set category to '{detected_category}'")
|
||||||
|
updated += 1
|
||||||
|
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("📊 Summary")
|
||||||
|
print("="*70)
|
||||||
|
print(f"✓ Updated: {updated} feeds")
|
||||||
|
print(f"✓ Already had categories: {len(all_feeds) - updated} feeds")
|
||||||
|
print("="*70 + "\n")
|
||||||
|
|
||||||
|
# Show final category distribution
|
||||||
|
print("Category distribution:")
|
||||||
|
categories = rss_feeds_collection.aggregate([
|
||||||
|
{'$group': {'_id': '$category', 'count': {'$sum': 1}}}
|
||||||
|
])
|
||||||
|
for cat in categories:
|
||||||
|
print(f" {cat['_id']}: {cat['count']} feeds")
|
||||||
|
|
||||||
|
print("\n✅ Done! Now run the migration script to update subscriber categories.")
|
||||||
60
backend/check_categories.py
Normal file
60
backend/check_categories.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Check what categories exist in RSS feeds and articles
|
||||||
|
"""
|
||||||
|
from pymongo import MongoClient
|
||||||
|
from config import Config
|
||||||
|
|
||||||
|
client = MongoClient(Config.MONGODB_URI)
|
||||||
|
db = client[Config.DB_NAME]
|
||||||
|
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("📊 Category Analysis")
|
||||||
|
print("="*70)
|
||||||
|
|
||||||
|
# Check RSS feed categories
|
||||||
|
print("\n🔹 RSS Feed Categories:")
|
||||||
|
rss_feeds_collection = db['rss_feeds']
|
||||||
|
feed_categories = rss_feeds_collection.distinct('category')
|
||||||
|
print(f" Unique categories: {feed_categories}")
|
||||||
|
print(f" Total: {len(feed_categories)} categories")
|
||||||
|
|
||||||
|
# Count feeds per category
|
||||||
|
print("\n Feeds per category:")
|
||||||
|
for cat in feed_categories:
|
||||||
|
count = rss_feeds_collection.count_documents({'category': cat})
|
||||||
|
feeds = list(rss_feeds_collection.find({'category': cat}, {'name': 1, '_id': 0}))
|
||||||
|
print(f" {cat}: {count} feeds")
|
||||||
|
for feed in feeds:
|
||||||
|
print(f" - {feed['name']}")
|
||||||
|
|
||||||
|
# Check article categories
|
||||||
|
print("\n🔹 Article Categories:")
|
||||||
|
articles_collection = db['articles']
|
||||||
|
article_categories = articles_collection.distinct('category')
|
||||||
|
print(f" Unique categories: {article_categories}")
|
||||||
|
print(f" Total: {len(article_categories)} categories")
|
||||||
|
|
||||||
|
# Count articles per category
|
||||||
|
print("\n Articles per category:")
|
||||||
|
for cat in article_categories:
|
||||||
|
count = articles_collection.count_documents({'category': cat})
|
||||||
|
with_summary = articles_collection.count_documents({'category': cat, 'summary': {'$exists': True, '$ne': None}})
|
||||||
|
print(f" {cat}: {count} articles ({with_summary} with summaries)")
|
||||||
|
|
||||||
|
# Check subscriber categories
|
||||||
|
print("\n🔹 Subscriber Categories:")
|
||||||
|
subscribers_collection = db['subscribers']
|
||||||
|
total_subs = subscribers_collection.count_documents({'status': 'active'})
|
||||||
|
print(f" Total active subscribers: {total_subs}")
|
||||||
|
|
||||||
|
# Sample a few subscribers to see their categories
|
||||||
|
sample_subs = list(subscribers_collection.find({'status': 'active'}, {'email': 1, 'categories': 1, '_id': 0}).limit(5))
|
||||||
|
print("\n Sample subscriber preferences:")
|
||||||
|
for sub in sample_subs:
|
||||||
|
cats = sub.get('categories', 'None')
|
||||||
|
print(f" {sub['email']}: {cats}")
|
||||||
|
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("✅ Analysis Complete")
|
||||||
|
print("="*70 + "\n")
|
||||||
Reference in New Issue
Block a user