228 lines
8.4 KiB
Python
228 lines
8.4 KiB
Python
"""
|
|
Email tracking service for Munich News Daily newsletter system.
|
|
Handles tracking ID generation and tracking record creation.
|
|
"""
|
|
|
|
import uuid
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional
|
|
from database import newsletter_sends_collection, link_clicks_collection, subscriber_activity_collection, subscribers_collection
|
|
|
|
|
|
def generate_tracking_id() -> str:
|
|
"""
|
|
Generate a unique tracking ID using UUID4.
|
|
|
|
Returns:
|
|
str: A unique UUID4 string for tracking purposes
|
|
"""
|
|
return str(uuid.uuid4())
|
|
|
|
|
|
def create_newsletter_tracking(
|
|
newsletter_id: str,
|
|
subscriber_email: str,
|
|
article_links: Optional[List[Dict[str, str]]] = None
|
|
) -> Dict[str, any]:
|
|
"""
|
|
Create tracking records for a newsletter send.
|
|
|
|
Creates a tracking record in newsletter_sends collection for email open tracking,
|
|
and creates tracking records in link_clicks collection for each article link.
|
|
Respects subscriber opt-out preferences.
|
|
|
|
Args:
|
|
newsletter_id: Unique identifier for the newsletter batch (e.g., date-based)
|
|
subscriber_email: Email address of the recipient
|
|
article_links: Optional list of article dictionaries with 'url' and 'title' keys
|
|
|
|
Returns:
|
|
dict: Tracking information containing:
|
|
- pixel_tracking_id: ID for the tracking pixel (None if opted out)
|
|
- link_tracking_map: Dict mapping original URLs to tracking IDs (empty if opted out)
|
|
- newsletter_id: The newsletter batch ID
|
|
- subscriber_email: The recipient email
|
|
- tracking_enabled: Boolean indicating if tracking is enabled for this subscriber
|
|
"""
|
|
# Check if subscriber has opted out of tracking
|
|
subscriber = subscribers_collection.find_one({'email': subscriber_email})
|
|
tracking_enabled = subscriber.get('tracking_enabled', True) if subscriber else True
|
|
|
|
# If tracking is disabled, return empty tracking data
|
|
if not tracking_enabled:
|
|
return {
|
|
'pixel_tracking_id': None,
|
|
'link_tracking_map': {},
|
|
'newsletter_id': newsletter_id,
|
|
'subscriber_email': subscriber_email,
|
|
'tracking_enabled': False
|
|
}
|
|
|
|
# Generate tracking ID for the email open pixel
|
|
pixel_tracking_id = generate_tracking_id()
|
|
|
|
# Create newsletter send tracking record
|
|
newsletter_send_doc = {
|
|
'newsletter_id': newsletter_id,
|
|
'subscriber_email': subscriber_email,
|
|
'tracking_id': pixel_tracking_id,
|
|
'sent_at': datetime.utcnow(),
|
|
'opened': False,
|
|
'first_opened_at': None,
|
|
'last_opened_at': None,
|
|
'open_count': 0,
|
|
'created_at': datetime.utcnow()
|
|
}
|
|
|
|
newsletter_sends_collection.insert_one(newsletter_send_doc)
|
|
|
|
# Create tracking records for article links
|
|
link_tracking_map = {}
|
|
|
|
if article_links:
|
|
# Import here to avoid circular dependency
|
|
from database import articles_collection
|
|
|
|
for article in article_links:
|
|
article_url = article.get('url')
|
|
article_title = article.get('title', '')
|
|
|
|
if article_url:
|
|
link_tracking_id = generate_tracking_id()
|
|
|
|
# Look up article metadata from database for personalization
|
|
article_doc = articles_collection.find_one({'link': article_url})
|
|
article_id = str(article_doc['_id']) if article_doc else None
|
|
category = article_doc.get('category', 'general') if article_doc else 'general'
|
|
keywords = article_doc.get('keywords', []) if article_doc else []
|
|
|
|
# Create link click tracking record with metadata
|
|
link_click_doc = {
|
|
'tracking_id': link_tracking_id,
|
|
'newsletter_id': newsletter_id,
|
|
'subscriber_email': subscriber_email,
|
|
'article_url': article_url,
|
|
'article_title': article_title,
|
|
'article_id': article_id, # NEW: Article database ID
|
|
'category': category, # NEW: Article category
|
|
'keywords': keywords, # NEW: Article keywords for personalization
|
|
'clicked': False,
|
|
'clicked_at': None,
|
|
'user_agent': None,
|
|
'created_at': datetime.utcnow()
|
|
}
|
|
|
|
link_clicks_collection.insert_one(link_click_doc)
|
|
|
|
# Map original URL to tracking ID
|
|
link_tracking_map[article_url] = link_tracking_id
|
|
|
|
return {
|
|
'pixel_tracking_id': pixel_tracking_id,
|
|
'link_tracking_map': link_tracking_map,
|
|
'newsletter_id': newsletter_id,
|
|
'subscriber_email': subscriber_email,
|
|
'tracking_enabled': True
|
|
}
|
|
|
|
|
|
|
|
def anonymize_old_tracking_data(retention_days: int = 90) -> Dict[str, int]:
|
|
"""
|
|
Anonymize tracking data older than the specified retention period.
|
|
|
|
Removes email addresses from tracking records while preserving aggregated metrics.
|
|
This helps comply with privacy regulations by not retaining personal data indefinitely.
|
|
|
|
Args:
|
|
retention_days: Number of days to retain personal data (default: 90)
|
|
|
|
Returns:
|
|
dict: Count of anonymized records for each collection:
|
|
- newsletter_sends_anonymized: Number of newsletter send records anonymized
|
|
- link_clicks_anonymized: Number of link click records anonymized
|
|
- total_anonymized: Total number of records anonymized
|
|
"""
|
|
cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
|
|
|
|
# Anonymize newsletter_sends records
|
|
newsletter_result = newsletter_sends_collection.update_many(
|
|
{
|
|
'sent_at': {'$lt': cutoff_date},
|
|
'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize
|
|
},
|
|
{
|
|
'$set': {
|
|
'subscriber_email': 'anonymized',
|
|
'anonymized_at': datetime.utcnow()
|
|
}
|
|
}
|
|
)
|
|
|
|
# Anonymize link_clicks records
|
|
link_clicks_result = link_clicks_collection.update_many(
|
|
{
|
|
'created_at': {'$lt': cutoff_date},
|
|
'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize
|
|
},
|
|
{
|
|
'$set': {
|
|
'subscriber_email': 'anonymized',
|
|
'anonymized_at': datetime.utcnow()
|
|
}
|
|
}
|
|
)
|
|
|
|
newsletter_count = newsletter_result.modified_count
|
|
link_clicks_count = link_clicks_result.modified_count
|
|
|
|
return {
|
|
'newsletter_sends_anonymized': newsletter_count,
|
|
'link_clicks_anonymized': link_clicks_count,
|
|
'total_anonymized': newsletter_count + link_clicks_count
|
|
}
|
|
|
|
|
|
def delete_subscriber_tracking_data(subscriber_email: str) -> Dict[str, int]:
|
|
"""
|
|
Delete all tracking data for a specific subscriber.
|
|
|
|
Removes all tracking records associated with a subscriber's email address
|
|
from all tracking collections. This supports GDPR right to be forgotten.
|
|
|
|
Args:
|
|
subscriber_email: Email address of the subscriber
|
|
|
|
Returns:
|
|
dict: Count of deleted records for each collection:
|
|
- newsletter_sends_deleted: Number of newsletter send records deleted
|
|
- link_clicks_deleted: Number of link click records deleted
|
|
- subscriber_activity_deleted: Number of activity records deleted
|
|
- total_deleted: Total number of records deleted
|
|
"""
|
|
# Delete from newsletter_sends
|
|
newsletter_result = newsletter_sends_collection.delete_many({
|
|
'subscriber_email': subscriber_email
|
|
})
|
|
|
|
# Delete from link_clicks
|
|
link_clicks_result = link_clicks_collection.delete_many({
|
|
'subscriber_email': subscriber_email
|
|
})
|
|
|
|
# Delete from subscriber_activity
|
|
activity_result = subscriber_activity_collection.delete_many({
|
|
'email': subscriber_email
|
|
})
|
|
|
|
newsletter_count = newsletter_result.deleted_count
|
|
link_clicks_count = link_clicks_result.deleted_count
|
|
activity_count = activity_result.deleted_count
|
|
|
|
return {
|
|
'newsletter_sends_deleted': newsletter_count,
|
|
'link_clicks_deleted': link_clicks_count,
|
|
'subscriber_activity_deleted': activity_count,
|
|
'total_deleted': newsletter_count + link_clicks_count + activity_count
|
|
}
|