Files
Munich-news/backend/services/analytics_service.py
2025-11-11 14:09:21 +01:00

307 lines
9.5 KiB
Python

"""
Analytics service for email tracking metrics and subscriber engagement.
Calculates open rates, click rates, and subscriber activity status.
"""
from datetime import datetime, timedelta
from typing import Dict, Optional
from database import (
newsletter_sends_collection,
link_clicks_collection,
subscriber_activity_collection
)
def get_open_rate(newsletter_id: str) -> float:
"""
Calculate the percentage of subscribers who opened a specific newsletter.
Args:
newsletter_id: Unique identifier for the newsletter batch
Returns:
float: Open rate as a percentage (0-100)
"""
# Count total sends for this newsletter
total_sends = newsletter_sends_collection.count_documents({
'newsletter_id': newsletter_id
})
if total_sends == 0:
return 0.0
# Count how many were opened
opened_count = newsletter_sends_collection.count_documents({
'newsletter_id': newsletter_id,
'opened': True
})
# Calculate percentage
open_rate = (opened_count / total_sends) * 100
return round(open_rate, 2)
def get_click_rate(article_url: str) -> float:
"""
Calculate the percentage of subscribers who clicked a specific article link.
Args:
article_url: The original article URL
Returns:
float: Click rate as a percentage (0-100)
"""
# Count total link tracking records for this article
total_links = link_clicks_collection.count_documents({
'article_url': article_url
})
if total_links == 0:
return 0.0
# Count how many were clicked
clicked_count = link_clicks_collection.count_documents({
'article_url': article_url,
'clicked': True
})
# Calculate percentage
click_rate = (clicked_count / total_links) * 100
return round(click_rate, 2)
def get_newsletter_metrics(newsletter_id: str) -> Dict:
"""
Get comprehensive metrics for a specific newsletter.
Args:
newsletter_id: Unique identifier for the newsletter batch
Returns:
dict: Dictionary containing:
- newsletter_id: The newsletter ID
- total_sent: Total number of emails sent
- total_opened: Number of emails opened
- open_rate: Percentage of emails opened
- total_clicks: Total number of link clicks
- unique_clickers: Number of unique subscribers who clicked
- click_through_rate: Percentage of recipients who clicked any link
"""
# Get total sends
total_sent = newsletter_sends_collection.count_documents({
'newsletter_id': newsletter_id
})
# Get total opened
total_opened = newsletter_sends_collection.count_documents({
'newsletter_id': newsletter_id,
'opened': True
})
# Calculate open rate
open_rate = (total_opened / total_sent * 100) if total_sent > 0 else 0.0
# Get total clicks for this newsletter
total_clicks = link_clicks_collection.count_documents({
'newsletter_id': newsletter_id,
'clicked': True
})
# Get unique clickers (distinct subscriber emails who clicked)
unique_clickers = len(link_clicks_collection.distinct(
'subscriber_email',
{'newsletter_id': newsletter_id, 'clicked': True}
))
# Calculate click-through rate (unique clickers / total sent)
click_through_rate = (unique_clickers / total_sent * 100) if total_sent > 0 else 0.0
return {
'newsletter_id': newsletter_id,
'total_sent': total_sent,
'total_opened': total_opened,
'open_rate': round(open_rate, 2),
'total_clicks': total_clicks,
'unique_clickers': unique_clickers,
'click_through_rate': round(click_through_rate, 2)
}
def get_article_performance(article_url: str) -> Dict:
"""
Get performance metrics for a specific article across all newsletters.
Args:
article_url: The original article URL
Returns:
dict: Dictionary containing:
- article_url: The article URL
- total_sent: Total times this article was sent
- total_clicks: Total number of clicks
- click_rate: Percentage of recipients who clicked
- unique_clickers: Number of unique subscribers who clicked
- newsletters: List of newsletter IDs that included this article
"""
# Get all link tracking records for this article
total_sent = link_clicks_collection.count_documents({
'article_url': article_url
})
# Get total clicks
total_clicks = link_clicks_collection.count_documents({
'article_url': article_url,
'clicked': True
})
# Calculate click rate
click_rate = (total_clicks / total_sent * 100) if total_sent > 0 else 0.0
# Get unique clickers
unique_clickers = len(link_clicks_collection.distinct(
'subscriber_email',
{'article_url': article_url, 'clicked': True}
))
# Get list of newsletters that included this article
newsletters = link_clicks_collection.distinct(
'newsletter_id',
{'article_url': article_url}
)
return {
'article_url': article_url,
'total_sent': total_sent,
'total_clicks': total_clicks,
'click_rate': round(click_rate, 2),
'unique_clickers': unique_clickers,
'newsletters': newsletters
}
def get_subscriber_activity_status(email: str) -> str:
"""
Get the activity status for a specific subscriber.
Classifies subscribers based on their last email open:
- 'active': Opened an email in the last 30 days
- 'inactive': No opens in 30-60 days
- 'dormant': No opens in 60+ days
- 'new': No opens yet
Args:
email: Subscriber email address
Returns:
str: Activity status ('active', 'inactive', 'dormant', or 'new')
"""
# Find the most recent open for this subscriber
most_recent_open = newsletter_sends_collection.find_one(
{'subscriber_email': email, 'opened': True},
sort=[('last_opened_at', -1)]
)
if not most_recent_open:
# Check if subscriber has received any newsletters
has_received = newsletter_sends_collection.count_documents({
'subscriber_email': email
}) > 0
return 'new' if has_received else 'new'
# Calculate days since last open
last_opened_at = most_recent_open.get('last_opened_at')
if not last_opened_at:
return 'new'
days_since_open = (datetime.utcnow() - last_opened_at).days
# Classify based on days since last open
if days_since_open <= 30:
return 'active'
elif days_since_open <= 60:
return 'inactive'
else:
return 'dormant'
def update_subscriber_activity_statuses() -> int:
"""
Batch update activity statuses for all subscribers.
Updates the subscriber_activity collection with current activity status,
engagement metrics, and last interaction timestamps for all subscribers
who have received newsletters.
Returns:
int: Number of subscriber records updated
"""
# Get all unique subscriber emails from newsletter sends
all_subscribers = newsletter_sends_collection.distinct('subscriber_email')
updated_count = 0
for email in all_subscribers:
# Get activity status
status = get_subscriber_activity_status(email)
# Get last opened timestamp
last_open_record = newsletter_sends_collection.find_one(
{'subscriber_email': email, 'opened': True},
sort=[('last_opened_at', -1)]
)
last_opened_at = last_open_record.get('last_opened_at') if last_open_record else None
# Get last clicked timestamp
last_click_record = link_clicks_collection.find_one(
{'subscriber_email': email, 'clicked': True},
sort=[('clicked_at', -1)]
)
last_clicked_at = last_click_record.get('clicked_at') if last_click_record else None
# Count total opens
total_opens = newsletter_sends_collection.count_documents({
'subscriber_email': email,
'opened': True
})
# Count total clicks
total_clicks = link_clicks_collection.count_documents({
'subscriber_email': email,
'clicked': True
})
# Count newsletters received
newsletters_received = newsletter_sends_collection.count_documents({
'subscriber_email': email
})
# Count newsletters opened (distinct newsletter_ids)
newsletters_opened = len(newsletter_sends_collection.distinct(
'newsletter_id',
{'subscriber_email': email, 'opened': True}
))
# Update or insert subscriber activity record
subscriber_activity_collection.update_one(
{'email': email},
{
'$set': {
'email': email,
'status': status,
'last_opened_at': last_opened_at,
'last_clicked_at': last_clicked_at,
'total_opens': total_opens,
'total_clicks': total_clicks,
'newsletters_received': newsletters_received,
'newsletters_opened': newsletters_opened,
'updated_at': datetime.utcnow()
}
},
upsert=True
)
updated_count += 1
return updated_count