This commit is contained in:
2025-11-11 14:09:21 +01:00
parent bcd0a10576
commit 1075a91eac
57 changed files with 5598 additions and 1366 deletions

View File

@@ -0,0 +1,306 @@
"""
Analytics service for email tracking metrics and subscriber engagement.
Calculates open rates, click rates, and subscriber activity status.
"""
from datetime import datetime, timedelta
from typing import Dict, Optional
from database import (
newsletter_sends_collection,
link_clicks_collection,
subscriber_activity_collection
)
def get_open_rate(newsletter_id: str) -> float:
"""
Calculate the percentage of subscribers who opened a specific newsletter.
Args:
newsletter_id: Unique identifier for the newsletter batch
Returns:
float: Open rate as a percentage (0-100)
"""
# Count total sends for this newsletter
total_sends = newsletter_sends_collection.count_documents({
'newsletter_id': newsletter_id
})
if total_sends == 0:
return 0.0
# Count how many were opened
opened_count = newsletter_sends_collection.count_documents({
'newsletter_id': newsletter_id,
'opened': True
})
# Calculate percentage
open_rate = (opened_count / total_sends) * 100
return round(open_rate, 2)
def get_click_rate(article_url: str) -> float:
"""
Calculate the percentage of subscribers who clicked a specific article link.
Args:
article_url: The original article URL
Returns:
float: Click rate as a percentage (0-100)
"""
# Count total link tracking records for this article
total_links = link_clicks_collection.count_documents({
'article_url': article_url
})
if total_links == 0:
return 0.0
# Count how many were clicked
clicked_count = link_clicks_collection.count_documents({
'article_url': article_url,
'clicked': True
})
# Calculate percentage
click_rate = (clicked_count / total_links) * 100
return round(click_rate, 2)
def get_newsletter_metrics(newsletter_id: str) -> Dict:
"""
Get comprehensive metrics for a specific newsletter.
Args:
newsletter_id: Unique identifier for the newsletter batch
Returns:
dict: Dictionary containing:
- newsletter_id: The newsletter ID
- total_sent: Total number of emails sent
- total_opened: Number of emails opened
- open_rate: Percentage of emails opened
- total_clicks: Total number of link clicks
- unique_clickers: Number of unique subscribers who clicked
- click_through_rate: Percentage of recipients who clicked any link
"""
# Get total sends
total_sent = newsletter_sends_collection.count_documents({
'newsletter_id': newsletter_id
})
# Get total opened
total_opened = newsletter_sends_collection.count_documents({
'newsletter_id': newsletter_id,
'opened': True
})
# Calculate open rate
open_rate = (total_opened / total_sent * 100) if total_sent > 0 else 0.0
# Get total clicks for this newsletter
total_clicks = link_clicks_collection.count_documents({
'newsletter_id': newsletter_id,
'clicked': True
})
# Get unique clickers (distinct subscriber emails who clicked)
unique_clickers = len(link_clicks_collection.distinct(
'subscriber_email',
{'newsletter_id': newsletter_id, 'clicked': True}
))
# Calculate click-through rate (unique clickers / total sent)
click_through_rate = (unique_clickers / total_sent * 100) if total_sent > 0 else 0.0
return {
'newsletter_id': newsletter_id,
'total_sent': total_sent,
'total_opened': total_opened,
'open_rate': round(open_rate, 2),
'total_clicks': total_clicks,
'unique_clickers': unique_clickers,
'click_through_rate': round(click_through_rate, 2)
}
def get_article_performance(article_url: str) -> Dict:
"""
Get performance metrics for a specific article across all newsletters.
Args:
article_url: The original article URL
Returns:
dict: Dictionary containing:
- article_url: The article URL
- total_sent: Total times this article was sent
- total_clicks: Total number of clicks
- click_rate: Percentage of recipients who clicked
- unique_clickers: Number of unique subscribers who clicked
- newsletters: List of newsletter IDs that included this article
"""
# Get all link tracking records for this article
total_sent = link_clicks_collection.count_documents({
'article_url': article_url
})
# Get total clicks
total_clicks = link_clicks_collection.count_documents({
'article_url': article_url,
'clicked': True
})
# Calculate click rate
click_rate = (total_clicks / total_sent * 100) if total_sent > 0 else 0.0
# Get unique clickers
unique_clickers = len(link_clicks_collection.distinct(
'subscriber_email',
{'article_url': article_url, 'clicked': True}
))
# Get list of newsletters that included this article
newsletters = link_clicks_collection.distinct(
'newsletter_id',
{'article_url': article_url}
)
return {
'article_url': article_url,
'total_sent': total_sent,
'total_clicks': total_clicks,
'click_rate': round(click_rate, 2),
'unique_clickers': unique_clickers,
'newsletters': newsletters
}
def get_subscriber_activity_status(email: str) -> str:
"""
Get the activity status for a specific subscriber.
Classifies subscribers based on their last email open:
- 'active': Opened an email in the last 30 days
- 'inactive': No opens in 30-60 days
- 'dormant': No opens in 60+ days
- 'new': No opens yet
Args:
email: Subscriber email address
Returns:
str: Activity status ('active', 'inactive', 'dormant', or 'new')
"""
# Find the most recent open for this subscriber
most_recent_open = newsletter_sends_collection.find_one(
{'subscriber_email': email, 'opened': True},
sort=[('last_opened_at', -1)]
)
if not most_recent_open:
# Check if subscriber has received any newsletters
has_received = newsletter_sends_collection.count_documents({
'subscriber_email': email
}) > 0
return 'new' if has_received else 'new'
# Calculate days since last open
last_opened_at = most_recent_open.get('last_opened_at')
if not last_opened_at:
return 'new'
days_since_open = (datetime.utcnow() - last_opened_at).days
# Classify based on days since last open
if days_since_open <= 30:
return 'active'
elif days_since_open <= 60:
return 'inactive'
else:
return 'dormant'
def update_subscriber_activity_statuses() -> int:
"""
Batch update activity statuses for all subscribers.
Updates the subscriber_activity collection with current activity status,
engagement metrics, and last interaction timestamps for all subscribers
who have received newsletters.
Returns:
int: Number of subscriber records updated
"""
# Get all unique subscriber emails from newsletter sends
all_subscribers = newsletter_sends_collection.distinct('subscriber_email')
updated_count = 0
for email in all_subscribers:
# Get activity status
status = get_subscriber_activity_status(email)
# Get last opened timestamp
last_open_record = newsletter_sends_collection.find_one(
{'subscriber_email': email, 'opened': True},
sort=[('last_opened_at', -1)]
)
last_opened_at = last_open_record.get('last_opened_at') if last_open_record else None
# Get last clicked timestamp
last_click_record = link_clicks_collection.find_one(
{'subscriber_email': email, 'clicked': True},
sort=[('clicked_at', -1)]
)
last_clicked_at = last_click_record.get('clicked_at') if last_click_record else None
# Count total opens
total_opens = newsletter_sends_collection.count_documents({
'subscriber_email': email,
'opened': True
})
# Count total clicks
total_clicks = link_clicks_collection.count_documents({
'subscriber_email': email,
'clicked': True
})
# Count newsletters received
newsletters_received = newsletter_sends_collection.count_documents({
'subscriber_email': email
})
# Count newsletters opened (distinct newsletter_ids)
newsletters_opened = len(newsletter_sends_collection.distinct(
'newsletter_id',
{'subscriber_email': email, 'opened': True}
))
# Update or insert subscriber activity record
subscriber_activity_collection.update_one(
{'email': email},
{
'$set': {
'email': email,
'status': status,
'last_opened_at': last_opened_at,
'last_clicked_at': last_clicked_at,
'total_opens': total_opens,
'total_clicks': total_clicks,
'newsletters_received': newsletters_received,
'newsletters_opened': newsletters_opened,
'updated_at': datetime.utcnow()
}
},
upsert=True
)
updated_count += 1
return updated_count

View File

@@ -0,0 +1,215 @@
"""
Email tracking service for Munich News Daily newsletter system.
Handles tracking ID generation and tracking record creation.
"""
import uuid
from datetime import datetime, timedelta
from typing import Dict, List, Optional
from database import newsletter_sends_collection, link_clicks_collection, subscriber_activity_collection, subscribers_collection
def generate_tracking_id() -> str:
"""
Generate a unique tracking ID using UUID4.
Returns:
str: A unique UUID4 string for tracking purposes
"""
return str(uuid.uuid4())
def create_newsletter_tracking(
newsletter_id: str,
subscriber_email: str,
article_links: Optional[List[Dict[str, str]]] = None
) -> Dict[str, any]:
"""
Create tracking records for a newsletter send.
Creates a tracking record in newsletter_sends collection for email open tracking,
and creates tracking records in link_clicks collection for each article link.
Respects subscriber opt-out preferences.
Args:
newsletter_id: Unique identifier for the newsletter batch (e.g., date-based)
subscriber_email: Email address of the recipient
article_links: Optional list of article dictionaries with 'url' and 'title' keys
Returns:
dict: Tracking information containing:
- pixel_tracking_id: ID for the tracking pixel (None if opted out)
- link_tracking_map: Dict mapping original URLs to tracking IDs (empty if opted out)
- newsletter_id: The newsletter batch ID
- subscriber_email: The recipient email
- tracking_enabled: Boolean indicating if tracking is enabled for this subscriber
"""
# Check if subscriber has opted out of tracking
subscriber = subscribers_collection.find_one({'email': subscriber_email})
tracking_enabled = subscriber.get('tracking_enabled', True) if subscriber else True
# If tracking is disabled, return empty tracking data
if not tracking_enabled:
return {
'pixel_tracking_id': None,
'link_tracking_map': {},
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email,
'tracking_enabled': False
}
# Generate tracking ID for the email open pixel
pixel_tracking_id = generate_tracking_id()
# Create newsletter send tracking record
newsletter_send_doc = {
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email,
'tracking_id': pixel_tracking_id,
'sent_at': datetime.utcnow(),
'opened': False,
'first_opened_at': None,
'last_opened_at': None,
'open_count': 0,
'created_at': datetime.utcnow()
}
newsletter_sends_collection.insert_one(newsletter_send_doc)
# Create tracking records for article links
link_tracking_map = {}
if article_links:
for article in article_links:
article_url = article.get('url')
article_title = article.get('title', '')
if article_url:
link_tracking_id = generate_tracking_id()
# Create link click tracking record
link_click_doc = {
'tracking_id': link_tracking_id,
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email,
'article_url': article_url,
'article_title': article_title,
'clicked': False,
'clicked_at': None,
'user_agent': None,
'created_at': datetime.utcnow()
}
link_clicks_collection.insert_one(link_click_doc)
# Map original URL to tracking ID
link_tracking_map[article_url] = link_tracking_id
return {
'pixel_tracking_id': pixel_tracking_id,
'link_tracking_map': link_tracking_map,
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email,
'tracking_enabled': True
}
def anonymize_old_tracking_data(retention_days: int = 90) -> Dict[str, int]:
"""
Anonymize tracking data older than the specified retention period.
Removes email addresses from tracking records while preserving aggregated metrics.
This helps comply with privacy regulations by not retaining personal data indefinitely.
Args:
retention_days: Number of days to retain personal data (default: 90)
Returns:
dict: Count of anonymized records for each collection:
- newsletter_sends_anonymized: Number of newsletter send records anonymized
- link_clicks_anonymized: Number of link click records anonymized
- total_anonymized: Total number of records anonymized
"""
cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
# Anonymize newsletter_sends records
newsletter_result = newsletter_sends_collection.update_many(
{
'sent_at': {'$lt': cutoff_date},
'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize
},
{
'$set': {
'subscriber_email': 'anonymized',
'anonymized_at': datetime.utcnow()
}
}
)
# Anonymize link_clicks records
link_clicks_result = link_clicks_collection.update_many(
{
'created_at': {'$lt': cutoff_date},
'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize
},
{
'$set': {
'subscriber_email': 'anonymized',
'anonymized_at': datetime.utcnow()
}
}
)
newsletter_count = newsletter_result.modified_count
link_clicks_count = link_clicks_result.modified_count
return {
'newsletter_sends_anonymized': newsletter_count,
'link_clicks_anonymized': link_clicks_count,
'total_anonymized': newsletter_count + link_clicks_count
}
def delete_subscriber_tracking_data(subscriber_email: str) -> Dict[str, int]:
"""
Delete all tracking data for a specific subscriber.
Removes all tracking records associated with a subscriber's email address
from all tracking collections. This supports GDPR right to be forgotten.
Args:
subscriber_email: Email address of the subscriber
Returns:
dict: Count of deleted records for each collection:
- newsletter_sends_deleted: Number of newsletter send records deleted
- link_clicks_deleted: Number of link click records deleted
- subscriber_activity_deleted: Number of activity records deleted
- total_deleted: Total number of records deleted
"""
# Delete from newsletter_sends
newsletter_result = newsletter_sends_collection.delete_many({
'subscriber_email': subscriber_email
})
# Delete from link_clicks
link_clicks_result = link_clicks_collection.delete_many({
'subscriber_email': subscriber_email
})
# Delete from subscriber_activity
activity_result = subscriber_activity_collection.delete_many({
'email': subscriber_email
})
newsletter_count = newsletter_result.deleted_count
link_clicks_count = link_clicks_result.deleted_count
activity_count = activity_result.deleted_count
return {
'newsletter_sends_deleted': newsletter_count,
'link_clicks_deleted': link_clicks_count,
'subscriber_activity_deleted': activity_count,
'total_deleted': newsletter_count + link_clicks_count + activity_count
}