This commit is contained in:
2025-11-11 14:09:21 +01:00
parent bcd0a10576
commit 1075a91eac
57 changed files with 5598 additions and 1366 deletions

View File

@@ -0,0 +1,215 @@
"""
Email tracking service for Munich News Daily newsletter system.
Handles tracking ID generation and tracking record creation.
"""
import uuid
from datetime import datetime, timedelta
from typing import Dict, List, Optional
from database import newsletter_sends_collection, link_clicks_collection, subscriber_activity_collection, subscribers_collection
def generate_tracking_id() -> str:
"""
Generate a unique tracking ID using UUID4.
Returns:
str: A unique UUID4 string for tracking purposes
"""
return str(uuid.uuid4())
def create_newsletter_tracking(
newsletter_id: str,
subscriber_email: str,
article_links: Optional[List[Dict[str, str]]] = None
) -> Dict[str, any]:
"""
Create tracking records for a newsletter send.
Creates a tracking record in newsletter_sends collection for email open tracking,
and creates tracking records in link_clicks collection for each article link.
Respects subscriber opt-out preferences.
Args:
newsletter_id: Unique identifier for the newsletter batch (e.g., date-based)
subscriber_email: Email address of the recipient
article_links: Optional list of article dictionaries with 'url' and 'title' keys
Returns:
dict: Tracking information containing:
- pixel_tracking_id: ID for the tracking pixel (None if opted out)
- link_tracking_map: Dict mapping original URLs to tracking IDs (empty if opted out)
- newsletter_id: The newsletter batch ID
- subscriber_email: The recipient email
- tracking_enabled: Boolean indicating if tracking is enabled for this subscriber
"""
# Check if subscriber has opted out of tracking
subscriber = subscribers_collection.find_one({'email': subscriber_email})
tracking_enabled = subscriber.get('tracking_enabled', True) if subscriber else True
# If tracking is disabled, return empty tracking data
if not tracking_enabled:
return {
'pixel_tracking_id': None,
'link_tracking_map': {},
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email,
'tracking_enabled': False
}
# Generate tracking ID for the email open pixel
pixel_tracking_id = generate_tracking_id()
# Create newsletter send tracking record
newsletter_send_doc = {
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email,
'tracking_id': pixel_tracking_id,
'sent_at': datetime.utcnow(),
'opened': False,
'first_opened_at': None,
'last_opened_at': None,
'open_count': 0,
'created_at': datetime.utcnow()
}
newsletter_sends_collection.insert_one(newsletter_send_doc)
# Create tracking records for article links
link_tracking_map = {}
if article_links:
for article in article_links:
article_url = article.get('url')
article_title = article.get('title', '')
if article_url:
link_tracking_id = generate_tracking_id()
# Create link click tracking record
link_click_doc = {
'tracking_id': link_tracking_id,
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email,
'article_url': article_url,
'article_title': article_title,
'clicked': False,
'clicked_at': None,
'user_agent': None,
'created_at': datetime.utcnow()
}
link_clicks_collection.insert_one(link_click_doc)
# Map original URL to tracking ID
link_tracking_map[article_url] = link_tracking_id
return {
'pixel_tracking_id': pixel_tracking_id,
'link_tracking_map': link_tracking_map,
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email,
'tracking_enabled': True
}
def anonymize_old_tracking_data(retention_days: int = 90) -> Dict[str, int]:
"""
Anonymize tracking data older than the specified retention period.
Removes email addresses from tracking records while preserving aggregated metrics.
This helps comply with privacy regulations by not retaining personal data indefinitely.
Args:
retention_days: Number of days to retain personal data (default: 90)
Returns:
dict: Count of anonymized records for each collection:
- newsletter_sends_anonymized: Number of newsletter send records anonymized
- link_clicks_anonymized: Number of link click records anonymized
- total_anonymized: Total number of records anonymized
"""
cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
# Anonymize newsletter_sends records
newsletter_result = newsletter_sends_collection.update_many(
{
'sent_at': {'$lt': cutoff_date},
'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize
},
{
'$set': {
'subscriber_email': 'anonymized',
'anonymized_at': datetime.utcnow()
}
}
)
# Anonymize link_clicks records
link_clicks_result = link_clicks_collection.update_many(
{
'created_at': {'$lt': cutoff_date},
'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize
},
{
'$set': {
'subscriber_email': 'anonymized',
'anonymized_at': datetime.utcnow()
}
}
)
newsletter_count = newsletter_result.modified_count
link_clicks_count = link_clicks_result.modified_count
return {
'newsletter_sends_anonymized': newsletter_count,
'link_clicks_anonymized': link_clicks_count,
'total_anonymized': newsletter_count + link_clicks_count
}
def delete_subscriber_tracking_data(subscriber_email: str) -> Dict[str, int]:
"""
Delete all tracking data for a specific subscriber.
Removes all tracking records associated with a subscriber's email address
from all tracking collections. This supports GDPR right to be forgotten.
Args:
subscriber_email: Email address of the subscriber
Returns:
dict: Count of deleted records for each collection:
- newsletter_sends_deleted: Number of newsletter send records deleted
- link_clicks_deleted: Number of link click records deleted
- subscriber_activity_deleted: Number of activity records deleted
- total_deleted: Total number of records deleted
"""
# Delete from newsletter_sends
newsletter_result = newsletter_sends_collection.delete_many({
'subscriber_email': subscriber_email
})
# Delete from link_clicks
link_clicks_result = link_clicks_collection.delete_many({
'subscriber_email': subscriber_email
})
# Delete from subscriber_activity
activity_result = subscriber_activity_collection.delete_many({
'email': subscriber_email
})
newsletter_count = newsletter_result.deleted_count
link_clicks_count = link_clicks_result.deleted_count
activity_count = activity_result.deleted_count
return {
'newsletter_sends_deleted': newsletter_count,
'link_clicks_deleted': link_clicks_count,
'subscriber_activity_deleted': activity_count,
'total_deleted': newsletter_count + link_clicks_count + activity_count
}