update
This commit is contained in:
306
backend/services/analytics_service.py
Normal file
306
backend/services/analytics_service.py
Normal file
@@ -0,0 +1,306 @@
|
||||
"""
|
||||
Analytics service for email tracking metrics and subscriber engagement.
|
||||
Calculates open rates, click rates, and subscriber activity status.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Optional
|
||||
from database import (
|
||||
newsletter_sends_collection,
|
||||
link_clicks_collection,
|
||||
subscriber_activity_collection
|
||||
)
|
||||
|
||||
|
||||
def get_open_rate(newsletter_id: str) -> float:
|
||||
"""
|
||||
Calculate the percentage of subscribers who opened a specific newsletter.
|
||||
|
||||
Args:
|
||||
newsletter_id: Unique identifier for the newsletter batch
|
||||
|
||||
Returns:
|
||||
float: Open rate as a percentage (0-100)
|
||||
"""
|
||||
# Count total sends for this newsletter
|
||||
total_sends = newsletter_sends_collection.count_documents({
|
||||
'newsletter_id': newsletter_id
|
||||
})
|
||||
|
||||
if total_sends == 0:
|
||||
return 0.0
|
||||
|
||||
# Count how many were opened
|
||||
opened_count = newsletter_sends_collection.count_documents({
|
||||
'newsletter_id': newsletter_id,
|
||||
'opened': True
|
||||
})
|
||||
|
||||
# Calculate percentage
|
||||
open_rate = (opened_count / total_sends) * 100
|
||||
return round(open_rate, 2)
|
||||
|
||||
|
||||
def get_click_rate(article_url: str) -> float:
|
||||
"""
|
||||
Calculate the percentage of subscribers who clicked a specific article link.
|
||||
|
||||
Args:
|
||||
article_url: The original article URL
|
||||
|
||||
Returns:
|
||||
float: Click rate as a percentage (0-100)
|
||||
"""
|
||||
# Count total link tracking records for this article
|
||||
total_links = link_clicks_collection.count_documents({
|
||||
'article_url': article_url
|
||||
})
|
||||
|
||||
if total_links == 0:
|
||||
return 0.0
|
||||
|
||||
# Count how many were clicked
|
||||
clicked_count = link_clicks_collection.count_documents({
|
||||
'article_url': article_url,
|
||||
'clicked': True
|
||||
})
|
||||
|
||||
# Calculate percentage
|
||||
click_rate = (clicked_count / total_links) * 100
|
||||
return round(click_rate, 2)
|
||||
|
||||
|
||||
def get_newsletter_metrics(newsletter_id: str) -> Dict:
|
||||
"""
|
||||
Get comprehensive metrics for a specific newsletter.
|
||||
|
||||
Args:
|
||||
newsletter_id: Unique identifier for the newsletter batch
|
||||
|
||||
Returns:
|
||||
dict: Dictionary containing:
|
||||
- newsletter_id: The newsletter ID
|
||||
- total_sent: Total number of emails sent
|
||||
- total_opened: Number of emails opened
|
||||
- open_rate: Percentage of emails opened
|
||||
- total_clicks: Total number of link clicks
|
||||
- unique_clickers: Number of unique subscribers who clicked
|
||||
- click_through_rate: Percentage of recipients who clicked any link
|
||||
"""
|
||||
# Get total sends
|
||||
total_sent = newsletter_sends_collection.count_documents({
|
||||
'newsletter_id': newsletter_id
|
||||
})
|
||||
|
||||
# Get total opened
|
||||
total_opened = newsletter_sends_collection.count_documents({
|
||||
'newsletter_id': newsletter_id,
|
||||
'opened': True
|
||||
})
|
||||
|
||||
# Calculate open rate
|
||||
open_rate = (total_opened / total_sent * 100) if total_sent > 0 else 0.0
|
||||
|
||||
# Get total clicks for this newsletter
|
||||
total_clicks = link_clicks_collection.count_documents({
|
||||
'newsletter_id': newsletter_id,
|
||||
'clicked': True
|
||||
})
|
||||
|
||||
# Get unique clickers (distinct subscriber emails who clicked)
|
||||
unique_clickers = len(link_clicks_collection.distinct(
|
||||
'subscriber_email',
|
||||
{'newsletter_id': newsletter_id, 'clicked': True}
|
||||
))
|
||||
|
||||
# Calculate click-through rate (unique clickers / total sent)
|
||||
click_through_rate = (unique_clickers / total_sent * 100) if total_sent > 0 else 0.0
|
||||
|
||||
return {
|
||||
'newsletter_id': newsletter_id,
|
||||
'total_sent': total_sent,
|
||||
'total_opened': total_opened,
|
||||
'open_rate': round(open_rate, 2),
|
||||
'total_clicks': total_clicks,
|
||||
'unique_clickers': unique_clickers,
|
||||
'click_through_rate': round(click_through_rate, 2)
|
||||
}
|
||||
|
||||
|
||||
def get_article_performance(article_url: str) -> Dict:
|
||||
"""
|
||||
Get performance metrics for a specific article across all newsletters.
|
||||
|
||||
Args:
|
||||
article_url: The original article URL
|
||||
|
||||
Returns:
|
||||
dict: Dictionary containing:
|
||||
- article_url: The article URL
|
||||
- total_sent: Total times this article was sent
|
||||
- total_clicks: Total number of clicks
|
||||
- click_rate: Percentage of recipients who clicked
|
||||
- unique_clickers: Number of unique subscribers who clicked
|
||||
- newsletters: List of newsletter IDs that included this article
|
||||
"""
|
||||
# Get all link tracking records for this article
|
||||
total_sent = link_clicks_collection.count_documents({
|
||||
'article_url': article_url
|
||||
})
|
||||
|
||||
# Get total clicks
|
||||
total_clicks = link_clicks_collection.count_documents({
|
||||
'article_url': article_url,
|
||||
'clicked': True
|
||||
})
|
||||
|
||||
# Calculate click rate
|
||||
click_rate = (total_clicks / total_sent * 100) if total_sent > 0 else 0.0
|
||||
|
||||
# Get unique clickers
|
||||
unique_clickers = len(link_clicks_collection.distinct(
|
||||
'subscriber_email',
|
||||
{'article_url': article_url, 'clicked': True}
|
||||
))
|
||||
|
||||
# Get list of newsletters that included this article
|
||||
newsletters = link_clicks_collection.distinct(
|
||||
'newsletter_id',
|
||||
{'article_url': article_url}
|
||||
)
|
||||
|
||||
return {
|
||||
'article_url': article_url,
|
||||
'total_sent': total_sent,
|
||||
'total_clicks': total_clicks,
|
||||
'click_rate': round(click_rate, 2),
|
||||
'unique_clickers': unique_clickers,
|
||||
'newsletters': newsletters
|
||||
}
|
||||
|
||||
|
||||
def get_subscriber_activity_status(email: str) -> str:
|
||||
"""
|
||||
Get the activity status for a specific subscriber.
|
||||
|
||||
Classifies subscribers based on their last email open:
|
||||
- 'active': Opened an email in the last 30 days
|
||||
- 'inactive': No opens in 30-60 days
|
||||
- 'dormant': No opens in 60+ days
|
||||
- 'new': No opens yet
|
||||
|
||||
Args:
|
||||
email: Subscriber email address
|
||||
|
||||
Returns:
|
||||
str: Activity status ('active', 'inactive', 'dormant', or 'new')
|
||||
"""
|
||||
# Find the most recent open for this subscriber
|
||||
most_recent_open = newsletter_sends_collection.find_one(
|
||||
{'subscriber_email': email, 'opened': True},
|
||||
sort=[('last_opened_at', -1)]
|
||||
)
|
||||
|
||||
if not most_recent_open:
|
||||
# Check if subscriber has received any newsletters
|
||||
has_received = newsletter_sends_collection.count_documents({
|
||||
'subscriber_email': email
|
||||
}) > 0
|
||||
|
||||
return 'new' if has_received else 'new'
|
||||
|
||||
# Calculate days since last open
|
||||
last_opened_at = most_recent_open.get('last_opened_at')
|
||||
if not last_opened_at:
|
||||
return 'new'
|
||||
|
||||
days_since_open = (datetime.utcnow() - last_opened_at).days
|
||||
|
||||
# Classify based on days since last open
|
||||
if days_since_open <= 30:
|
||||
return 'active'
|
||||
elif days_since_open <= 60:
|
||||
return 'inactive'
|
||||
else:
|
||||
return 'dormant'
|
||||
|
||||
|
||||
def update_subscriber_activity_statuses() -> int:
|
||||
"""
|
||||
Batch update activity statuses for all subscribers.
|
||||
|
||||
Updates the subscriber_activity collection with current activity status,
|
||||
engagement metrics, and last interaction timestamps for all subscribers
|
||||
who have received newsletters.
|
||||
|
||||
Returns:
|
||||
int: Number of subscriber records updated
|
||||
"""
|
||||
# Get all unique subscriber emails from newsletter sends
|
||||
all_subscribers = newsletter_sends_collection.distinct('subscriber_email')
|
||||
|
||||
updated_count = 0
|
||||
|
||||
for email in all_subscribers:
|
||||
# Get activity status
|
||||
status = get_subscriber_activity_status(email)
|
||||
|
||||
# Get last opened timestamp
|
||||
last_open_record = newsletter_sends_collection.find_one(
|
||||
{'subscriber_email': email, 'opened': True},
|
||||
sort=[('last_opened_at', -1)]
|
||||
)
|
||||
last_opened_at = last_open_record.get('last_opened_at') if last_open_record else None
|
||||
|
||||
# Get last clicked timestamp
|
||||
last_click_record = link_clicks_collection.find_one(
|
||||
{'subscriber_email': email, 'clicked': True},
|
||||
sort=[('clicked_at', -1)]
|
||||
)
|
||||
last_clicked_at = last_click_record.get('clicked_at') if last_click_record else None
|
||||
|
||||
# Count total opens
|
||||
total_opens = newsletter_sends_collection.count_documents({
|
||||
'subscriber_email': email,
|
||||
'opened': True
|
||||
})
|
||||
|
||||
# Count total clicks
|
||||
total_clicks = link_clicks_collection.count_documents({
|
||||
'subscriber_email': email,
|
||||
'clicked': True
|
||||
})
|
||||
|
||||
# Count newsletters received
|
||||
newsletters_received = newsletter_sends_collection.count_documents({
|
||||
'subscriber_email': email
|
||||
})
|
||||
|
||||
# Count newsletters opened (distinct newsletter_ids)
|
||||
newsletters_opened = len(newsletter_sends_collection.distinct(
|
||||
'newsletter_id',
|
||||
{'subscriber_email': email, 'opened': True}
|
||||
))
|
||||
|
||||
# Update or insert subscriber activity record
|
||||
subscriber_activity_collection.update_one(
|
||||
{'email': email},
|
||||
{
|
||||
'$set': {
|
||||
'email': email,
|
||||
'status': status,
|
||||
'last_opened_at': last_opened_at,
|
||||
'last_clicked_at': last_clicked_at,
|
||||
'total_opens': total_opens,
|
||||
'total_clicks': total_clicks,
|
||||
'newsletters_received': newsletters_received,
|
||||
'newsletters_opened': newsletters_opened,
|
||||
'updated_at': datetime.utcnow()
|
||||
}
|
||||
},
|
||||
upsert=True
|
||||
)
|
||||
|
||||
updated_count += 1
|
||||
|
||||
return updated_count
|
||||
215
backend/services/tracking_service.py
Normal file
215
backend/services/tracking_service.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""
|
||||
Email tracking service for Munich News Daily newsletter system.
|
||||
Handles tracking ID generation and tracking record creation.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
from database import newsletter_sends_collection, link_clicks_collection, subscriber_activity_collection, subscribers_collection
|
||||
|
||||
|
||||
def generate_tracking_id() -> str:
|
||||
"""
|
||||
Generate a unique tracking ID using UUID4.
|
||||
|
||||
Returns:
|
||||
str: A unique UUID4 string for tracking purposes
|
||||
"""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def create_newsletter_tracking(
|
||||
newsletter_id: str,
|
||||
subscriber_email: str,
|
||||
article_links: Optional[List[Dict[str, str]]] = None
|
||||
) -> Dict[str, any]:
|
||||
"""
|
||||
Create tracking records for a newsletter send.
|
||||
|
||||
Creates a tracking record in newsletter_sends collection for email open tracking,
|
||||
and creates tracking records in link_clicks collection for each article link.
|
||||
Respects subscriber opt-out preferences.
|
||||
|
||||
Args:
|
||||
newsletter_id: Unique identifier for the newsletter batch (e.g., date-based)
|
||||
subscriber_email: Email address of the recipient
|
||||
article_links: Optional list of article dictionaries with 'url' and 'title' keys
|
||||
|
||||
Returns:
|
||||
dict: Tracking information containing:
|
||||
- pixel_tracking_id: ID for the tracking pixel (None if opted out)
|
||||
- link_tracking_map: Dict mapping original URLs to tracking IDs (empty if opted out)
|
||||
- newsletter_id: The newsletter batch ID
|
||||
- subscriber_email: The recipient email
|
||||
- tracking_enabled: Boolean indicating if tracking is enabled for this subscriber
|
||||
"""
|
||||
# Check if subscriber has opted out of tracking
|
||||
subscriber = subscribers_collection.find_one({'email': subscriber_email})
|
||||
tracking_enabled = subscriber.get('tracking_enabled', True) if subscriber else True
|
||||
|
||||
# If tracking is disabled, return empty tracking data
|
||||
if not tracking_enabled:
|
||||
return {
|
||||
'pixel_tracking_id': None,
|
||||
'link_tracking_map': {},
|
||||
'newsletter_id': newsletter_id,
|
||||
'subscriber_email': subscriber_email,
|
||||
'tracking_enabled': False
|
||||
}
|
||||
|
||||
# Generate tracking ID for the email open pixel
|
||||
pixel_tracking_id = generate_tracking_id()
|
||||
|
||||
# Create newsletter send tracking record
|
||||
newsletter_send_doc = {
|
||||
'newsletter_id': newsletter_id,
|
||||
'subscriber_email': subscriber_email,
|
||||
'tracking_id': pixel_tracking_id,
|
||||
'sent_at': datetime.utcnow(),
|
||||
'opened': False,
|
||||
'first_opened_at': None,
|
||||
'last_opened_at': None,
|
||||
'open_count': 0,
|
||||
'created_at': datetime.utcnow()
|
||||
}
|
||||
|
||||
newsletter_sends_collection.insert_one(newsletter_send_doc)
|
||||
|
||||
# Create tracking records for article links
|
||||
link_tracking_map = {}
|
||||
|
||||
if article_links:
|
||||
for article in article_links:
|
||||
article_url = article.get('url')
|
||||
article_title = article.get('title', '')
|
||||
|
||||
if article_url:
|
||||
link_tracking_id = generate_tracking_id()
|
||||
|
||||
# Create link click tracking record
|
||||
link_click_doc = {
|
||||
'tracking_id': link_tracking_id,
|
||||
'newsletter_id': newsletter_id,
|
||||
'subscriber_email': subscriber_email,
|
||||
'article_url': article_url,
|
||||
'article_title': article_title,
|
||||
'clicked': False,
|
||||
'clicked_at': None,
|
||||
'user_agent': None,
|
||||
'created_at': datetime.utcnow()
|
||||
}
|
||||
|
||||
link_clicks_collection.insert_one(link_click_doc)
|
||||
|
||||
# Map original URL to tracking ID
|
||||
link_tracking_map[article_url] = link_tracking_id
|
||||
|
||||
return {
|
||||
'pixel_tracking_id': pixel_tracking_id,
|
||||
'link_tracking_map': link_tracking_map,
|
||||
'newsletter_id': newsletter_id,
|
||||
'subscriber_email': subscriber_email,
|
||||
'tracking_enabled': True
|
||||
}
|
||||
|
||||
|
||||
|
||||
def anonymize_old_tracking_data(retention_days: int = 90) -> Dict[str, int]:
|
||||
"""
|
||||
Anonymize tracking data older than the specified retention period.
|
||||
|
||||
Removes email addresses from tracking records while preserving aggregated metrics.
|
||||
This helps comply with privacy regulations by not retaining personal data indefinitely.
|
||||
|
||||
Args:
|
||||
retention_days: Number of days to retain personal data (default: 90)
|
||||
|
||||
Returns:
|
||||
dict: Count of anonymized records for each collection:
|
||||
- newsletter_sends_anonymized: Number of newsletter send records anonymized
|
||||
- link_clicks_anonymized: Number of link click records anonymized
|
||||
- total_anonymized: Total number of records anonymized
|
||||
"""
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
|
||||
|
||||
# Anonymize newsletter_sends records
|
||||
newsletter_result = newsletter_sends_collection.update_many(
|
||||
{
|
||||
'sent_at': {'$lt': cutoff_date},
|
||||
'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize
|
||||
},
|
||||
{
|
||||
'$set': {
|
||||
'subscriber_email': 'anonymized',
|
||||
'anonymized_at': datetime.utcnow()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Anonymize link_clicks records
|
||||
link_clicks_result = link_clicks_collection.update_many(
|
||||
{
|
||||
'created_at': {'$lt': cutoff_date},
|
||||
'subscriber_email': {'$ne': 'anonymized'} # Don't re-anonymize
|
||||
},
|
||||
{
|
||||
'$set': {
|
||||
'subscriber_email': 'anonymized',
|
||||
'anonymized_at': datetime.utcnow()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
newsletter_count = newsletter_result.modified_count
|
||||
link_clicks_count = link_clicks_result.modified_count
|
||||
|
||||
return {
|
||||
'newsletter_sends_anonymized': newsletter_count,
|
||||
'link_clicks_anonymized': link_clicks_count,
|
||||
'total_anonymized': newsletter_count + link_clicks_count
|
||||
}
|
||||
|
||||
|
||||
def delete_subscriber_tracking_data(subscriber_email: str) -> Dict[str, int]:
|
||||
"""
|
||||
Delete all tracking data for a specific subscriber.
|
||||
|
||||
Removes all tracking records associated with a subscriber's email address
|
||||
from all tracking collections. This supports GDPR right to be forgotten.
|
||||
|
||||
Args:
|
||||
subscriber_email: Email address of the subscriber
|
||||
|
||||
Returns:
|
||||
dict: Count of deleted records for each collection:
|
||||
- newsletter_sends_deleted: Number of newsletter send records deleted
|
||||
- link_clicks_deleted: Number of link click records deleted
|
||||
- subscriber_activity_deleted: Number of activity records deleted
|
||||
- total_deleted: Total number of records deleted
|
||||
"""
|
||||
# Delete from newsletter_sends
|
||||
newsletter_result = newsletter_sends_collection.delete_many({
|
||||
'subscriber_email': subscriber_email
|
||||
})
|
||||
|
||||
# Delete from link_clicks
|
||||
link_clicks_result = link_clicks_collection.delete_many({
|
||||
'subscriber_email': subscriber_email
|
||||
})
|
||||
|
||||
# Delete from subscriber_activity
|
||||
activity_result = subscriber_activity_collection.delete_many({
|
||||
'email': subscriber_email
|
||||
})
|
||||
|
||||
newsletter_count = newsletter_result.deleted_count
|
||||
link_clicks_count = link_clicks_result.deleted_count
|
||||
activity_count = activity_result.deleted_count
|
||||
|
||||
return {
|
||||
'newsletter_sends_deleted': newsletter_count,
|
||||
'link_clicks_deleted': link_clicks_count,
|
||||
'subscriber_activity_deleted': activity_count,
|
||||
'total_deleted': newsletter_count + link_clicks_count + activity_count
|
||||
}
|
||||
Reference in New Issue
Block a user