Files
Munich-news/news_sender/sender_service.py
2025-11-11 14:09:21 +01:00

411 lines
14 KiB
Python

#!/usr/bin/env python
"""
News Sender Service - Standalone microservice for sending newsletters
Fetches articles from MongoDB and sends to subscribers via email
"""
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime
from pathlib import Path
from jinja2 import Template
from pymongo import MongoClient
import os
import sys
from dotenv import load_dotenv
# Add backend directory to path for importing tracking service
backend_dir = Path(__file__).parent.parent / 'backend'
sys.path.insert(0, str(backend_dir))
# Import tracking modules
from services import tracking_service
from tracking_integration import inject_tracking_pixel, replace_article_links, generate_tracking_urls
# Load environment variables from backend/.env
backend_dir = Path(__file__).parent.parent / 'backend'
env_path = backend_dir / '.env'
if env_path.exists():
load_dotenv(dotenv_path=env_path)
print(f"✓ Loaded configuration from: {env_path}")
else:
print(f"⚠ Warning: .env file not found at {env_path}")
class Config:
"""Configuration for news sender"""
# MongoDB
MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
DB_NAME = 'munich_news'
# Email
SMTP_SERVER = os.getenv('SMTP_SERVER', 'smtp.gmail.com')
SMTP_PORT = int(os.getenv('SMTP_PORT', '587'))
EMAIL_USER = os.getenv('EMAIL_USER', '')
EMAIL_PASSWORD = os.getenv('EMAIL_PASSWORD', '')
# Newsletter
MAX_ARTICLES = int(os.getenv('NEWSLETTER_MAX_ARTICLES', '10'))
HOURS_LOOKBACK = int(os.getenv('NEWSLETTER_HOURS_LOOKBACK', '24'))
WEBSITE_URL = os.getenv('WEBSITE_URL', 'http://localhost:3000')
# Tracking
TRACKING_ENABLED = os.getenv('TRACKING_ENABLED', 'true').lower() == 'true'
TRACKING_API_URL = os.getenv('TRACKING_API_URL', 'http://localhost:5001')
TRACKING_DATA_RETENTION_DAYS = int(os.getenv('TRACKING_DATA_RETENTION_DAYS', '90'))
# MongoDB connection
client = MongoClient(Config.MONGODB_URI)
db = client[Config.DB_NAME]
articles_collection = db['articles']
subscribers_collection = db['subscribers']
def get_latest_articles(max_articles=10, hours=24):
"""
Get latest articles with AI summaries from database (from today only)
Args:
max_articles: Maximum number of articles to return
hours: Number of hours to look back (default 24)
Returns:
list: Articles with summaries published today
"""
from datetime import timedelta
# Calculate cutoff time (e.g., 24 hours ago)
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
# Get start of today (00:00:00 UTC)
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
# Query for articles with summaries published today OR created today
# This ensures we only get fresh articles from today
cursor = articles_collection.find({
'summary': {'$exists': True, '$ne': None},
'$or': [
# Articles published today (if published_at is available)
{'published_at': {'$gte': today_start}},
# Articles created today (fallback if published_at is missing)
{'created_at': {'$gte': today_start}}
]
}).sort('created_at', -1).limit(max_articles)
articles = []
for doc in cursor:
# Double-check the date to ensure it's from today
published_at = doc.get('published_at')
created_at = doc.get('created_at')
# Skip if both dates are old (extra safety check)
if published_at and isinstance(published_at, datetime):
if published_at < today_start:
continue
elif created_at and isinstance(created_at, datetime):
if created_at < today_start:
continue
articles.append({
'title': doc.get('title', ''),
'author': doc.get('author'),
'link': doc.get('link', ''),
'summary': doc.get('summary', ''),
'source': doc.get('source', ''),
'published_at': doc.get('published_at', '')
})
return articles
def get_active_subscribers():
"""
Get all active subscribers from database
Returns:
list: Email addresses of active subscribers
"""
cursor = subscribers_collection.find({'status': 'active'})
return [doc['email'] for doc in cursor]
def render_newsletter_html(articles, tracking_enabled=False, pixel_tracking_id=None,
link_tracking_map=None, api_url=None):
"""
Render newsletter HTML from template with optional tracking integration
Args:
articles: List of article dictionaries
tracking_enabled: Whether to inject tracking pixel and replace links
pixel_tracking_id: Tracking ID for the email open pixel
link_tracking_map: Dictionary mapping original URLs to tracking IDs
api_url: Base URL for the tracking API
Returns:
str: Rendered HTML content with tracking injected if enabled
"""
# Load template
template_path = Path(__file__).parent / 'newsletter_template.html'
with open(template_path, 'r', encoding='utf-8') as f:
template_content = f.read()
template = Template(template_content)
# Prepare template data
now = datetime.now()
template_data = {
'date': now.strftime('%A, %B %d, %Y'),
'year': now.year,
'article_count': len(articles),
'articles': articles,
'unsubscribe_link': f'{Config.WEBSITE_URL}/unsubscribe',
'website_link': Config.WEBSITE_URL,
'tracking_enabled': tracking_enabled
}
# Render HTML
html = template.render(**template_data)
# Inject tracking if enabled
if tracking_enabled and pixel_tracking_id and api_url:
# Inject tracking pixel
html = inject_tracking_pixel(html, pixel_tracking_id, api_url)
# Replace article links with tracking URLs
if link_tracking_map:
html = replace_article_links(html, link_tracking_map, api_url)
return html
def send_email(to_email, subject, html_content):
"""
Send email to a single recipient
Args:
to_email: Recipient email address
subject: Email subject
html_content: HTML content of email
Returns:
tuple: (success: bool, error: str or None)
"""
try:
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
msg['From'] = f'Munich News Daily <{Config.EMAIL_USER}>'
msg['To'] = to_email
msg['Date'] = datetime.now().strftime('%a, %d %b %Y %H:%M:%S %z')
msg['Message-ID'] = f'<{datetime.now().timestamp()}.{to_email}@dongho.kim>'
msg['X-Mailer'] = 'Munich News Daily Sender'
# Add plain text version as fallback
plain_text = "This email requires HTML support. Please view it in an HTML-capable email client."
msg.attach(MIMEText(plain_text, 'plain', 'utf-8'))
# Add HTML version
msg.attach(MIMEText(html_content, 'html', 'utf-8'))
server = smtplib.SMTP(Config.SMTP_SERVER, Config.SMTP_PORT)
server.starttls()
server.login(Config.EMAIL_USER, Config.EMAIL_PASSWORD)
server.send_message(msg)
server.quit()
return True, None
except Exception as e:
return False, str(e)
def send_newsletter(max_articles=None, test_email=None):
"""
Send newsletter to all active subscribers
Args:
max_articles: Maximum number of articles to include (default from config)
test_email: If provided, send only to this email (for testing)
Returns:
dict: Statistics about sending
"""
print("\n" + "="*70)
print("📧 Munich News Daily - Newsletter Sender")
print("="*70)
# Validate email configuration
if not Config.EMAIL_USER or not Config.EMAIL_PASSWORD:
print("❌ Email credentials not configured")
print(" Set EMAIL_USER and EMAIL_PASSWORD in .env file")
return {
'success': False,
'error': 'Email credentials not configured'
}
# Get articles from today only
max_articles = max_articles or Config.MAX_ARTICLES
today_date = datetime.now().strftime('%B %d, %Y')
print(f"\nFetching articles published TODAY ({today_date})...")
print(f" Max articles: {max_articles}")
articles = get_latest_articles(max_articles, hours=Config.HOURS_LOOKBACK)
if not articles:
print("❌ No articles from today with summaries found")
print(f" No articles published today ({today_date})")
print(" Run the crawler with Ollama enabled to get fresh content")
return {
'success': False,
'error': f'No articles published today'
}
print(f"✓ Found {len(articles)} recent article(s)")
# Get subscribers
if test_email:
subscribers = [test_email]
print(f"\n🧪 Test mode: Sending to {test_email} only")
else:
print("\nFetching active subscribers...")
subscribers = get_active_subscribers()
print(f"✓ Found {len(subscribers)} active subscriber(s)")
if not subscribers:
print("❌ No active subscribers found")
return {
'success': False,
'error': 'No active subscribers'
}
# Generate newsletter ID (date-based)
newsletter_id = f"newsletter-{datetime.now().strftime('%Y-%m-%d')}"
# Send to subscribers
subject = f"Munich News Daily - {datetime.now().strftime('%B %d, %Y')}"
print(f"\nSending newsletter: '{subject}'")
print(f"Newsletter ID: {newsletter_id}")
print(f"Tracking enabled: {Config.TRACKING_ENABLED}")
print("-" * 70)
sent_count = 0
failed_count = 0
errors = []
for i, email in enumerate(subscribers, 1):
print(f"[{i}/{len(subscribers)}] Sending to {email}...", end=' ')
# Generate tracking data for this subscriber if tracking is enabled
if Config.TRACKING_ENABLED:
try:
tracking_data = generate_tracking_urls(
articles=articles,
newsletter_id=newsletter_id,
subscriber_email=email,
tracking_service=tracking_service
)
# Render newsletter with tracking
html_content = render_newsletter_html(
articles=articles,
tracking_enabled=True,
pixel_tracking_id=tracking_data['pixel_tracking_id'],
link_tracking_map=tracking_data['link_tracking_map'],
api_url=Config.TRACKING_API_URL
)
except Exception as e:
print(f"⚠ Tracking error: {e}, sending without tracking...", end=' ')
# Fallback: send without tracking
html_content = render_newsletter_html(articles)
else:
# Render newsletter without tracking
html_content = render_newsletter_html(articles)
# Send email
success, error = send_email(email, subject, html_content)
if success:
print("")
sent_count += 1
else:
print(f"{error}")
failed_count += 1
errors.append({'email': email, 'error': error})
# Summary
print("\n" + "="*70)
print("📊 Sending Complete")
print("="*70)
print(f"✓ Successfully sent: {sent_count}")
print(f"✗ Failed: {failed_count}")
print(f"📰 Articles included: {len(articles)}")
print("="*70 + "\n")
return {
'success': True,
'sent_count': sent_count,
'failed_count': failed_count,
'total_subscribers': len(subscribers),
'article_count': len(articles),
'errors': errors
}
def preview_newsletter(max_articles=None, hours=None):
"""
Generate newsletter HTML for preview (doesn't send)
Args:
max_articles: Maximum number of articles to include
hours: Hours to look back (default from config)
Returns:
str: HTML content
"""
max_articles = max_articles or Config.MAX_ARTICLES
hours = hours or Config.HOURS_LOOKBACK
articles = get_latest_articles(max_articles, hours=hours)
if not articles:
today_date = datetime.now().strftime('%B %d, %Y')
return f"<h1>No articles from today found</h1><p>No articles published today ({today_date}). Run the crawler with Ollama enabled to get fresh content.</p>"
# Preview without tracking
return render_newsletter_html(articles, tracking_enabled=False)
if __name__ == '__main__':
# Parse command line arguments
if len(sys.argv) > 1:
command = sys.argv[1]
if command == 'preview':
# Generate preview HTML
html = preview_newsletter()
output_file = 'newsletter_preview.html'
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html)
print(f"✓ Preview saved to {output_file}")
print(f" Open it in your browser to see the newsletter")
elif command == 'test':
# Send test email
if len(sys.argv) < 3:
print("Usage: python sender_service.py test <email>")
sys.exit(1)
test_email = sys.argv[2]
send_newsletter(test_email=test_email)
elif command == 'send':
# Send to all subscribers
max_articles = int(sys.argv[2]) if len(sys.argv) > 2 else None
send_newsletter(max_articles=max_articles)
else:
print("Unknown command. Usage:")
print(" python sender_service.py preview - Generate HTML preview")
print(" python sender_service.py test <email> - Send test email")
print(" python sender_service.py send [count] - Send to all subscribers")
else:
# Default: send newsletter
send_newsletter()