""" Email tracking service for Munich News Daily newsletter system. Handles tracking ID generation and tracking record creation. """ import uuid from datetime import datetime, timedelta from typing import Dict, List, Optional from database import newsletter_sends_collection, link_clicks_collection, subscriber_activity_collection, subscribers_collection def generate_tracking_id() -> str: """ Generate a unique tracking ID using UUID4. Returns: str: A unique UUID4 string for tracking purposes """ return str(uuid.uuid4()) def create_newsletter_tracking( newsletter_id: str, subscriber_email: str, article_links: Optional[List[Dict[str, str]]] = None ) -> Dict[str, any]: """ Create tracking records for a newsletter send. Creates a tracking record in newsletter_sends collection for email open tracking, and creates tracking records in link_clicks collection for each article link. Respects subscriber opt-out preferences. Args: newsletter_id: Unique identifier for the newsletter batch (e.g., date-based) subscriber_email: Email address of the recipient article_links: Optional list of article dictionaries with 'url' and 'title' keys Returns: dict: Tracking information containing: - pixel_tracking_id: ID for the tracking pixel (None if opted out) - link_tracking_map: Dict mapping original URLs to tracking IDs (empty if opted out) - newsletter_id: The newsletter batch ID - subscriber_email: The recipient email - tracking_enabled: Boolean indicating if tracking is enabled for this subscriber """ # Check if subscriber has opted out of tracking subscriber = subscribers_collection.find_one({'email': subscriber_email}) tracking_enabled = subscriber.get('tracking_enabled', True) if subscriber else True # If tracking is disabled, return empty tracking data if not tracking_enabled: return { 'pixel_tracking_id': None, 'link_tracking_map': {}, 'newsletter_id': newsletter_id, 'subscriber_email': subscriber_email, 'tracking_enabled': False } # Generate tracking ID for the email open pixel pixel_tracking_id = generate_tracking_id() # Create newsletter send tracking record newsletter_send_doc = { 'newsletter_id': newsletter_id, 'subscriber_email': subscriber_email, 'tracking_id': pixel_tracking_id, 'sent_at': datetime.utcnow(), 'opened': False, 'first_opened_at': None, 'last_opened_at': None, 'open_count': 0, 'created_at': datetime.utcnow() } newsletter_sends_collection.insert_one(newsletter_send_doc) # Create tracking records for article links link_tracking_map = {} if article_links: for article in article_links: article_url = article.get('url') article_title = article.get('title', '') if article_url: link_tracking_id = generate_tracking_id() # Create link click tracking record link_click_doc = { 'tracking_id': link_tracking_id, 'newsletter_id': newsletter_id, 'subscriber_email': subscriber_email, 'article_url': article_url, 'article_title': article_title, 'clicked': False, 'clicked_at': None, 'user_agent': None, 'created_at': datetime.utcnow() } link_clicks_collection.insert_one(link_click_doc) # Map original URL to tracking ID link_tracking_map[article_url] = link_tracking_id return { 'pixel_tracking_id': pixel_tracking_id, 'link_tracking_map': link_tracking_map, 'newsletter_id': newsletter_id, 'subscriber_email': subscriber_email, 'tracking_enabled': True } def anonymize_old_tracking_data(retention_days: int = 90) -> Dict[str, int]: """ Anonymize tracking data older than the specified retention period. Removes email addresses from tracking records while preserving aggregated metrics. This helps comply with privacy regulations by not retaining personal data indefinitely. Args: retention_days: Number of days to retain personal data (default: 90) Returns: dict: Count of anonymized records for each collection: - newsletter_sends_anonymized: Number of newsletter send records anonymized - link_clicks_anonymized: Number of link click records anonymized - total_anonymized: Total number of records anonymized """ cutoff_date = datetime.utcnow() - timedelta(days=retention_days) # Anonymize newsletter_sends records newsletter_result = newsletter_sends_collection.update_many( { 'sent_at': {'$lt': cutoff_date}, 'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize }, { '$set': { 'subscriber_email': 'anonymized', 'anonymized_at': datetime.utcnow() } } ) # Anonymize link_clicks records link_clicks_result = link_clicks_collection.update_many( { 'created_at': {'$lt': cutoff_date}, 'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize }, { '$set': { 'subscriber_email': 'anonymized', 'anonymized_at': datetime.utcnow() } } ) newsletter_count = newsletter_result.modified_count link_clicks_count = link_clicks_result.modified_count return { 'newsletter_sends_anonymized': newsletter_count, 'link_clicks_anonymized': link_clicks_count, 'total_anonymized': newsletter_count + link_clicks_count } def delete_subscriber_tracking_data(subscriber_email: str) -> Dict[str, int]: """ Delete all tracking data for a specific subscriber. Removes all tracking records associated with a subscriber's email address from all tracking collections. This supports GDPR right to be forgotten. Args: subscriber_email: Email address of the subscriber Returns: dict: Count of deleted records for each collection: - newsletter_sends_deleted: Number of newsletter send records deleted - link_clicks_deleted: Number of link click records deleted - subscriber_activity_deleted: Number of activity records deleted - total_deleted: Total number of records deleted """ # Delete from newsletter_sends newsletter_result = newsletter_sends_collection.delete_many({ 'subscriber_email': subscriber_email }) # Delete from link_clicks link_clicks_result = link_clicks_collection.delete_many({ 'subscriber_email': subscriber_email }) # Delete from subscriber_activity activity_result = subscriber_activity_collection.delete_many({ 'email': subscriber_email }) newsletter_count = newsletter_result.deleted_count link_clicks_count = link_clicks_result.deleted_count activity_count = activity_result.deleted_count return { 'newsletter_sends_deleted': newsletter_count, 'link_clicks_deleted': link_clicks_count, 'subscriber_activity_deleted': activity_count, 'total_deleted': newsletter_count + link_clicks_count + activity_count }