#!/usr/bin/env python """ News Sender Service - Standalone microservice for sending newsletters Fetches articles from MongoDB and sends to subscribers via email """ import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart from datetime import datetime from pathlib import Path from jinja2 import Template from pymongo import MongoClient import os import sys from dotenv import load_dotenv # Add backend directory to path for importing tracking service # Check if running in Docker (backend is at /app/backend) or locally (../backend) if Path('/app/backend').exists(): backend_dir = Path('/app/backend') else: backend_dir = Path(__file__).parent.parent / 'backend' sys.path.insert(0, str(backend_dir)) # Import tracking modules from services import tracking_service from tracking_integration import inject_tracking_pixel, replace_article_links, generate_tracking_urls # Load environment variables from backend/.env backend_dir = Path(__file__).parent.parent / 'backend' env_path = backend_dir / '.env' if env_path.exists(): load_dotenv(dotenv_path=env_path) print(f"โœ“ Loaded configuration from: {env_path}") else: print(f"โš  Warning: .env file not found at {env_path}") class Config: """Configuration for news sender""" # MongoDB MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/') DB_NAME = 'munich_news' # Email SMTP_SERVER = os.getenv('SMTP_SERVER', 'smtp.gmail.com') SMTP_PORT = int(os.getenv('SMTP_PORT', '587')) EMAIL_USER = os.getenv('EMAIL_USER', '') EMAIL_PASSWORD = os.getenv('EMAIL_PASSWORD', '') # Newsletter MAX_ARTICLES = int(os.getenv('NEWSLETTER_MAX_ARTICLES', '10')) HOURS_LOOKBACK = int(os.getenv('NEWSLETTER_HOURS_LOOKBACK', '24')) WEBSITE_URL = os.getenv('WEBSITE_URL', 'http://localhost:3000') # Tracking TRACKING_ENABLED = os.getenv('TRACKING_ENABLED', 'true').lower() == 'true' TRACKING_API_URL = os.getenv('TRACKING_API_URL', 'http://localhost:5001') TRACKING_DATA_RETENTION_DAYS = int(os.getenv('TRACKING_DATA_RETENTION_DAYS', '90')) # MongoDB connection client = MongoClient(Config.MONGODB_URI) db = client[Config.DB_NAME] articles_collection = db['articles'] subscribers_collection = db['subscribers'] def get_latest_articles(max_articles=10, hours=24): """ Get latest articles with AI summaries from database (from today only) Args: max_articles: Maximum number of articles to return hours: Number of hours to look back (default 24) Returns: list: Articles with summaries published today """ from datetime import timedelta # Calculate cutoff time (e.g., 24 hours ago) cutoff_time = datetime.utcnow() - timedelta(hours=hours) # Get start of today (00:00:00 UTC) today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # Query for articles with summaries published today OR created today # This ensures we only get fresh articles from today cursor = articles_collection.find({ 'summary': {'$exists': True, '$ne': None}, '$or': [ # Articles published today (if published_at is available) {'published_at': {'$gte': today_start}}, # Articles created today (fallback if published_at is missing) {'created_at': {'$gte': today_start}} ] }).sort('created_at', -1).limit(max_articles) articles = [] for doc in cursor: # Double-check the date to ensure it's from today published_at = doc.get('published_at') created_at = doc.get('created_at') # Skip if both dates are old (extra safety check) if published_at and isinstance(published_at, datetime): if published_at < today_start: continue elif created_at and isinstance(created_at, datetime): if created_at < today_start: continue articles.append({ 'title': doc.get('title', ''), 'author': doc.get('author'), 'link': doc.get('link', ''), 'summary': doc.get('summary', ''), 'source': doc.get('source', ''), 'published_at': doc.get('published_at', '') }) return articles def get_active_subscribers(): """ Get all active subscribers from database Returns: list: Email addresses of active subscribers """ cursor = subscribers_collection.find({'status': 'active'}) return [doc['email'] for doc in cursor] def render_newsletter_html(articles, tracking_enabled=False, pixel_tracking_id=None, link_tracking_map=None, api_url=None): """ Render newsletter HTML from template with optional tracking integration Args: articles: List of article dictionaries tracking_enabled: Whether to inject tracking pixel and replace links pixel_tracking_id: Tracking ID for the email open pixel link_tracking_map: Dictionary mapping original URLs to tracking IDs api_url: Base URL for the tracking API Returns: str: Rendered HTML content with tracking injected if enabled """ # Load template template_path = Path(__file__).parent / 'newsletter_template.html' with open(template_path, 'r', encoding='utf-8') as f: template_content = f.read() template = Template(template_content) # Prepare template data now = datetime.now() template_data = { 'date': now.strftime('%A, %B %d, %Y'), 'year': now.year, 'article_count': len(articles), 'articles': articles, 'unsubscribe_link': f'{Config.WEBSITE_URL}/unsubscribe', 'website_link': Config.WEBSITE_URL, 'tracking_enabled': tracking_enabled } # Render HTML html = template.render(**template_data) # Inject tracking if enabled if tracking_enabled and pixel_tracking_id and api_url: # Inject tracking pixel html = inject_tracking_pixel(html, pixel_tracking_id, api_url) # Replace article links with tracking URLs if link_tracking_map: html = replace_article_links(html, link_tracking_map, api_url) return html def send_email(to_email, subject, html_content): """ Send email to a single recipient Args: to_email: Recipient email address subject: Email subject html_content: HTML content of email Returns: tuple: (success: bool, error: str or None) """ try: msg = MIMEMultipart('alternative') msg['Subject'] = subject msg['From'] = f'Munich News Daily <{Config.EMAIL_USER}>' msg['To'] = to_email msg['Date'] = datetime.now().strftime('%a, %d %b %Y %H:%M:%S %z') msg['Message-ID'] = f'<{datetime.now().timestamp()}.{to_email}@dongho.kim>' msg['X-Mailer'] = 'Munich News Daily Sender' # Add plain text version as fallback plain_text = "This email requires HTML support. Please view it in an HTML-capable email client." msg.attach(MIMEText(plain_text, 'plain', 'utf-8')) # Add HTML version msg.attach(MIMEText(html_content, 'html', 'utf-8')) server = smtplib.SMTP(Config.SMTP_SERVER, Config.SMTP_PORT) server.starttls() server.login(Config.EMAIL_USER, Config.EMAIL_PASSWORD) server.send_message(msg) server.quit() return True, None except Exception as e: return False, str(e) def send_newsletter(max_articles=None, test_email=None): """ Send newsletter to all active subscribers Args: max_articles: Maximum number of articles to include (default from config) test_email: If provided, send only to this email (for testing) Returns: dict: Statistics about sending """ print("\n" + "="*70) print("๐Ÿ“ง Munich News Daily - Newsletter Sender") print("="*70) # Validate email configuration if not Config.EMAIL_USER or not Config.EMAIL_PASSWORD: print("โŒ Email credentials not configured") print(" Set EMAIL_USER and EMAIL_PASSWORD in .env file") return { 'success': False, 'error': 'Email credentials not configured' } # Get articles from today only max_articles = max_articles or Config.MAX_ARTICLES today_date = datetime.now().strftime('%B %d, %Y') print(f"\nFetching articles published TODAY ({today_date})...") print(f" Max articles: {max_articles}") articles = get_latest_articles(max_articles, hours=Config.HOURS_LOOKBACK) if not articles: print("โŒ No articles from today with summaries found") print(f" No articles published today ({today_date})") print(" Run the crawler with Ollama enabled to get fresh content") return { 'success': False, 'error': f'No articles published today' } print(f"โœ“ Found {len(articles)} recent article(s)") # Get subscribers if test_email: subscribers = [test_email] print(f"\n๐Ÿงช Test mode: Sending to {test_email} only") else: print("\nFetching active subscribers...") subscribers = get_active_subscribers() print(f"โœ“ Found {len(subscribers)} active subscriber(s)") if not subscribers: print("โŒ No active subscribers found") return { 'success': False, 'error': 'No active subscribers' } # Generate newsletter ID (date-based) newsletter_id = f"newsletter-{datetime.now().strftime('%Y-%m-%d')}" # Send to subscribers subject = f"Munich News Daily - {datetime.now().strftime('%B %d, %Y')}" print(f"\nSending newsletter: '{subject}'") print(f"Newsletter ID: {newsletter_id}") print(f"Tracking enabled: {Config.TRACKING_ENABLED}") print("-" * 70) sent_count = 0 failed_count = 0 errors = [] for i, email in enumerate(subscribers, 1): print(f"[{i}/{len(subscribers)}] Sending to {email}...", end=' ') # Generate tracking data for this subscriber if tracking is enabled if Config.TRACKING_ENABLED: try: tracking_data = generate_tracking_urls( articles=articles, newsletter_id=newsletter_id, subscriber_email=email, tracking_service=tracking_service ) # Render newsletter with tracking html_content = render_newsletter_html( articles=articles, tracking_enabled=True, pixel_tracking_id=tracking_data['pixel_tracking_id'], link_tracking_map=tracking_data['link_tracking_map'], api_url=Config.TRACKING_API_URL ) except Exception as e: print(f"โš  Tracking error: {e}, sending without tracking...", end=' ') # Fallback: send without tracking html_content = render_newsletter_html(articles) else: # Render newsletter without tracking html_content = render_newsletter_html(articles) # Send email success, error = send_email(email, subject, html_content) if success: print("โœ“") sent_count += 1 else: print(f"โœ— {error}") failed_count += 1 errors.append({'email': email, 'error': error}) # Summary print("\n" + "="*70) print("๐Ÿ“Š Sending Complete") print("="*70) print(f"โœ“ Successfully sent: {sent_count}") print(f"โœ— Failed: {failed_count}") print(f"๐Ÿ“ฐ Articles included: {len(articles)}") print("="*70 + "\n") return { 'success': True, 'sent_count': sent_count, 'failed_count': failed_count, 'total_subscribers': len(subscribers), 'article_count': len(articles), 'errors': errors } def preview_newsletter(max_articles=None, hours=None): """ Generate newsletter HTML for preview (doesn't send) Args: max_articles: Maximum number of articles to include hours: Hours to look back (default from config) Returns: str: HTML content """ max_articles = max_articles or Config.MAX_ARTICLES hours = hours or Config.HOURS_LOOKBACK articles = get_latest_articles(max_articles, hours=hours) if not articles: today_date = datetime.now().strftime('%B %d, %Y') return f"

No articles from today found

No articles published today ({today_date}). Run the crawler with Ollama enabled to get fresh content.

" # Preview without tracking return render_newsletter_html(articles, tracking_enabled=False) if __name__ == '__main__': # Parse command line arguments if len(sys.argv) > 1: command = sys.argv[1] if command == 'preview': # Generate preview HTML html = preview_newsletter() output_file = 'newsletter_preview.html' with open(output_file, 'w', encoding='utf-8') as f: f.write(html) print(f"โœ“ Preview saved to {output_file}") print(f" Open it in your browser to see the newsletter") elif command == 'test': # Send test email if len(sys.argv) < 3: print("Usage: python sender_service.py test ") sys.exit(1) test_email = sys.argv[2] send_newsletter(test_email=test_email) elif command == 'send': # Send to all subscribers max_articles = int(sys.argv[2]) if len(sys.argv) > 2 else None send_newsletter(max_articles=max_articles) else: print("Unknown command. Usage:") print(" python sender_service.py preview - Generate HTML preview") print(" python sender_service.py test - Send test email") print(" python sender_service.py send [count] - Send to all subscribers") else: # Default: send newsletter send_newsletter()