""" Script to fix duplicate RSS feeds and create unique index Run this once: python fix_duplicates.py """ from pymongo import MongoClient from config import Config # Connect to MongoDB client = MongoClient(Config.MONGODB_URI) db = client[Config.DB_NAME] rss_feeds_collection = db['rss_feeds'] print("Fixing duplicate RSS feeds...") # Get all feeds all_feeds = list(rss_feeds_collection.find()) print(f"Total feeds found: {len(all_feeds)}") # Find duplicates by URL seen_urls = {} duplicates_to_remove = [] for feed in all_feeds: url = feed.get('url') if url in seen_urls: # This is a duplicate, mark for removal duplicates_to_remove.append(feed['_id']) print(f" Duplicate found: {feed['name']} - {url}") else: # First occurrence, keep it seen_urls[url] = feed['_id'] # Remove duplicates if duplicates_to_remove: result = rss_feeds_collection.delete_many({'_id': {'$in': duplicates_to_remove}}) print(f"Removed {result.deleted_count} duplicate feeds") else: print("No duplicates found") # Drop existing indexes (if any) print("\nDropping existing indexes...") try: rss_feeds_collection.drop_indexes() print("Indexes dropped") except Exception as e: print(f"Note: {e}") # Create unique index on URL print("\nCreating unique index on 'url' field...") rss_feeds_collection.create_index('url', unique=True) print("āœ“ Unique index created successfully") # Verify remaining_feeds = list(rss_feeds_collection.find()) print(f"\nFinal feed count: {len(remaining_feeds)}") print("\nRemaining feeds:") for feed in remaining_feeds: print(f" - {feed['name']}: {feed['url']}") print("\nāœ“ Done! Duplicates removed and unique index created.") print("You can now restart your Flask app.")