from pymongo import MongoClient from datetime import datetime from config import Config # MongoDB setup client = MongoClient(Config.MONGODB_URI) db = client[Config.DB_NAME] # Collections articles_collection = db['articles'] subscribers_collection = db['subscribers'] rss_feeds_collection = db['rss_feeds'] # Tracking Collections newsletter_sends_collection = db['newsletter_sends'] link_clicks_collection = db['link_clicks'] subscriber_activity_collection = db['subscriber_activity'] def init_db(): """Initialize database with indexes""" # Create unique index on article links to prevent duplicates articles_collection.create_index('link', unique=True) # Create index on created_at for faster sorting articles_collection.create_index('created_at') # Create unique index on subscriber emails subscribers_collection.create_index('email', unique=True) # Create index on subscribed_at subscribers_collection.create_index('subscribed_at') # Create unique index on RSS feed URLs rss_feeds_collection.create_index('url', unique=True) # Initialize tracking collections indexes init_tracking_collections() # Initialize default RSS feeds if collection is empty if rss_feeds_collection.count_documents({}) == 0: default_feeds = [ { 'name': 'Süddeutsche Zeitung München', 'url': 'https://www.sueddeutsche.de/muenchen/rss', 'active': True, 'created_at': datetime.utcnow() }, { 'name': 'Münchner Merkur', 'url': 'https://www.merkur.de/muenchen/rss', 'active': True, 'created_at': datetime.utcnow() }, { 'name': 'Abendzeitung München', 'url': 'https://www.abendzeitung-muenchen.de/rss', 'active': True, 'created_at': datetime.utcnow() } ] rss_feeds_collection.insert_many(default_feeds) print(f"Initialized {len(default_feeds)} default RSS feeds") print("Database initialized with indexes") def init_tracking_collections(): """Initialize tracking collections with indexes for email tracking system""" # Newsletter Sends Collection Indexes # Unique index on tracking_id for fast pixel/click lookups newsletter_sends_collection.create_index('tracking_id', unique=True) # Index on newsletter_id for analytics queries newsletter_sends_collection.create_index('newsletter_id') # Index on subscriber_email for user activity queries newsletter_sends_collection.create_index('subscriber_email') # Index on sent_at for time-based queries newsletter_sends_collection.create_index('sent_at') # Link Clicks Collection Indexes # Unique index on tracking_id for fast redirect lookups link_clicks_collection.create_index('tracking_id', unique=True) # Index on newsletter_id for analytics queries link_clicks_collection.create_index('newsletter_id') # Index on article_url for article performance queries link_clicks_collection.create_index('article_url') # Index on subscriber_email for user activity queries link_clicks_collection.create_index('subscriber_email') # Subscriber Activity Collection Indexes # Unique index on email for fast lookups subscriber_activity_collection.create_index('email', unique=True) # Index on status for filtering by activity level subscriber_activity_collection.create_index('status') # Index on last_opened_at for time-based queries subscriber_activity_collection.create_index('last_opened_at') print("Tracking collections initialized with indexes")