62 lines
1.7 KiB
Python
62 lines
1.7 KiB
Python
"""
|
|
Script to fix duplicate RSS feeds and create unique index
|
|
Run this once: python fix_duplicates.py
|
|
"""
|
|
from pymongo import MongoClient
|
|
from config import Config
|
|
|
|
# Connect to MongoDB
|
|
client = MongoClient(Config.MONGODB_URI)
|
|
db = client[Config.DB_NAME]
|
|
rss_feeds_collection = db['rss_feeds']
|
|
|
|
print("Fixing duplicate RSS feeds...")
|
|
|
|
# Get all feeds
|
|
all_feeds = list(rss_feeds_collection.find())
|
|
print(f"Total feeds found: {len(all_feeds)}")
|
|
|
|
# Find duplicates by URL
|
|
seen_urls = {}
|
|
duplicates_to_remove = []
|
|
|
|
for feed in all_feeds:
|
|
url = feed.get('url')
|
|
if url in seen_urls:
|
|
# This is a duplicate, mark for removal
|
|
duplicates_to_remove.append(feed['_id'])
|
|
print(f" Duplicate found: {feed['name']} - {url}")
|
|
else:
|
|
# First occurrence, keep it
|
|
seen_urls[url] = feed['_id']
|
|
|
|
# Remove duplicates
|
|
if duplicates_to_remove:
|
|
result = rss_feeds_collection.delete_many({'_id': {'$in': duplicates_to_remove}})
|
|
print(f"Removed {result.deleted_count} duplicate feeds")
|
|
else:
|
|
print("No duplicates found")
|
|
|
|
# Drop existing indexes (if any)
|
|
print("\nDropping existing indexes...")
|
|
try:
|
|
rss_feeds_collection.drop_indexes()
|
|
print("Indexes dropped")
|
|
except Exception as e:
|
|
print(f"Note: {e}")
|
|
|
|
# Create unique index on URL
|
|
print("\nCreating unique index on 'url' field...")
|
|
rss_feeds_collection.create_index('url', unique=True)
|
|
print("✓ Unique index created successfully")
|
|
|
|
# Verify
|
|
remaining_feeds = list(rss_feeds_collection.find())
|
|
print(f"\nFinal feed count: {len(remaining_feeds)}")
|
|
print("\nRemaining feeds:")
|
|
for feed in remaining_feeds:
|
|
print(f" - {feed['name']}: {feed['url']}")
|
|
|
|
print("\n✓ Done! Duplicates removed and unique index created.")
|
|
print("You can now restart your Flask app.")
|