This commit is contained in:
2025-11-10 19:13:33 +01:00
commit ac5738c29d
64 changed files with 9445 additions and 0 deletions

61
backend/fix_duplicates.py Normal file
View File

@@ -0,0 +1,61 @@
"""
Script to fix duplicate RSS feeds and create unique index
Run this once: python fix_duplicates.py
"""
from pymongo import MongoClient
from config import Config
# Connect to MongoDB
client = MongoClient(Config.MONGODB_URI)
db = client[Config.DB_NAME]
rss_feeds_collection = db['rss_feeds']
print("Fixing duplicate RSS feeds...")
# Get all feeds
all_feeds = list(rss_feeds_collection.find())
print(f"Total feeds found: {len(all_feeds)}")
# Find duplicates by URL
seen_urls = {}
duplicates_to_remove = []
for feed in all_feeds:
url = feed.get('url')
if url in seen_urls:
# This is a duplicate, mark for removal
duplicates_to_remove.append(feed['_id'])
print(f" Duplicate found: {feed['name']} - {url}")
else:
# First occurrence, keep it
seen_urls[url] = feed['_id']
# Remove duplicates
if duplicates_to_remove:
result = rss_feeds_collection.delete_many({'_id': {'$in': duplicates_to_remove}})
print(f"Removed {result.deleted_count} duplicate feeds")
else:
print("No duplicates found")
# Drop existing indexes (if any)
print("\nDropping existing indexes...")
try:
rss_feeds_collection.drop_indexes()
print("Indexes dropped")
except Exception as e:
print(f"Note: {e}")
# Create unique index on URL
print("\nCreating unique index on 'url' field...")
rss_feeds_collection.create_index('url', unique=True)
print("✓ Unique index created successfully")
# Verify
remaining_feeds = list(rss_feeds_collection.find())
print(f"\nFinal feed count: {len(remaining_feeds)}")
print("\nRemaining feeds:")
for feed in remaining_feeds:
print(f" - {feed['name']}: {feed['url']}")
print("\n✓ Done! Duplicates removed and unique index created.")
print("You can now restart your Flask app.")