Files
Munich-news/news_sender/tracking_integration.py
2025-11-11 14:09:21 +01:00

151 lines
5.4 KiB
Python

"""
Tracking integration module for Munich News Daily newsletter system.
Handles injection of tracking pixels and replacement of article links with tracking URLs.
"""
import re
from typing import Dict, List
from bs4 import BeautifulSoup
def inject_tracking_pixel(html: str, tracking_id: str, api_url: str) -> str:
"""
Inject tracking pixel into newsletter HTML before closing </body> tag.
The tracking pixel is a 1x1 transparent image that loads when the email is opened,
allowing us to track email opens.
Args:
html: Original newsletter HTML content
tracking_id: Unique tracking ID for this newsletter send (None if tracking disabled)
api_url: Base URL for the tracking API (e.g., http://localhost:5001)
Returns:
str: HTML with tracking pixel injected (unchanged if tracking_id is None)
Example:
>>> html = '<html><body><p>Content</p></body></html>'
>>> inject_tracking_pixel(html, 'abc-123', 'http://api.example.com')
'<html><body><p>Content</p><img src="http://api.example.com/api/track/pixel/abc-123" width="1" height="1" alt="" /></body></html>'
"""
# Skip tracking if no tracking_id provided (subscriber opted out)
if not tracking_id:
return html
# Construct tracking pixel URL
pixel_url = f"{api_url}/api/track/pixel/{tracking_id}"
# Create tracking pixel HTML
pixel_html = f'<img src="{pixel_url}" width="1" height="1" alt="" style="display:block;" />'
# Inject pixel before closing </body> tag
if '</body>' in html:
html = html.replace('</body>', f'{pixel_html}</body>')
else:
# Fallback: append to end if no </body> tag found
html += pixel_html
return html
def replace_article_links(
html: str,
link_tracking_map: Dict[str, str],
api_url: str
) -> str:
"""
Replace article links in newsletter HTML with tracking URLs.
Finds all article links in the HTML and replaces them with tracking redirect URLs
that log clicks before redirecting to the original article.
Args:
html: Original newsletter HTML content
link_tracking_map: Dictionary mapping original URLs to tracking IDs (empty if tracking disabled)
api_url: Base URL for the tracking API (e.g., http://localhost:5001)
Returns:
str: HTML with article links replaced by tracking URLs (unchanged if map is empty)
Example:
>>> html = '<a href="https://example.com/article">Read</a>'
>>> mapping = {'https://example.com/article': 'track-123'}
>>> replace_article_links(html, mapping, 'http://api.example.com')
'<a href="http://api.example.com/api/track/click/track-123">Read</a>'
"""
# Skip tracking if no tracking map provided (subscriber opted out)
if not link_tracking_map:
return html
# Parse HTML with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
# Find all <a> tags with href attributes
for link in soup.find_all('a', href=True):
original_url = link['href']
# Check if this URL should be tracked
if original_url in link_tracking_map:
tracking_id = link_tracking_map[original_url]
tracking_url = f"{api_url}/api/track/click/{tracking_id}"
# Replace the href with tracking URL
link['href'] = tracking_url
# Return modified HTML
return str(soup)
def generate_tracking_urls(
articles: List[Dict],
newsletter_id: str,
subscriber_email: str,
tracking_service
) -> Dict[str, str]:
"""
Generate tracking records for all article links and return URL mapping.
Creates tracking records in the database for each article link and returns
a mapping of original URLs to tracking IDs.
Args:
articles: List of article dictionaries with 'link' and 'title' keys
newsletter_id: Unique identifier for the newsletter batch
subscriber_email: Email address of the recipient
tracking_service: Tracking service module with create_newsletter_tracking function
Returns:
dict: Dictionary containing:
- pixel_tracking_id: ID for the tracking pixel
- link_tracking_map: Dict mapping original URLs to tracking IDs
Example:
>>> articles = [{'link': 'https://example.com/1', 'title': 'Article 1'}]
>>> generate_tracking_urls(articles, 'news-2024-01-01', 'user@example.com', tracking_service)
{
'pixel_tracking_id': 'uuid-for-pixel',
'link_tracking_map': {'https://example.com/1': 'uuid-for-link'}
}
"""
# Prepare article links for tracking
article_links = []
for article in articles:
if 'link' in article and article['link']:
article_links.append({
'url': article['link'],
'title': article.get('title', '')
})
# Create tracking records using the tracking service
tracking_data = tracking_service.create_newsletter_tracking(
newsletter_id=newsletter_id,
subscriber_email=subscriber_email,
article_links=article_links
)
return {
'pixel_tracking_id': tracking_data['pixel_tracking_id'],
'link_tracking_map': tracking_data['link_tracking_map'],
'tracking_enabled': tracking_data.get('tracking_enabled', True)
}