diff --git a/news_sender/newsletter_template.html b/news_sender/newsletter_template.html index d4e95a1..95193b0 100644 --- a/news_sender/newsletter_template.html +++ b/news_sender/newsletter_template.html @@ -50,8 +50,27 @@ - - {% for article in articles %} + + {% if trending_articles %} + + + + + + +
+

+ 🔥 Top Trending in Munich +

+

+ The most talked-about stories today +

+
+ + + + + {% for article in trending_articles %} @@ -79,10 +98,14 @@

+ {% if article.is_clustered %} + Multiple sources + {% else %} {{ article.source }} {% if article.author %} • {{ article.author }} {% endif %} + {% endif %}

@@ -90,10 +113,25 @@ {{ article.summary }}

- + + {% if article.is_clustered and article.sources %} + +

+ 📰 Covered by {{ article.article_count }} sources: +

+
+ {% for source in article.sources %} + + {{ source.name }} → + + {% endfor %} +
+ {% else %} + Read more → + {% endif %} @@ -106,6 +144,110 @@ {% endif %} {% endfor %} + {% endif %} + + + {% if other_articles %} + + + +
+ + + + + + + + + +
+

+ 📰 More Stories +

+

+ Additional news from around Munich +

+
+ + + + + {% for article in other_articles %} + + + + + + + +
+ + {{ loop.index + trending_articles|length }} + +
+ + +

+ {{ article.title_en if article.title_en else article.title }} +

+ + + {% if article.title_en and article.title_en != article.title %} +

+ Original: {{ article.title }} +

+ {% endif %} + + +

+ {% if article.is_clustered %} + Multiple sources + {% else %} + {{ article.source }} + {% if article.author %} + • {{ article.author }} + {% endif %} + {% endif %} +

+ + +

+ {{ article.summary }} +

+ + + {% if article.is_clustered and article.sources %} + +

+ 📰 Covered by {{ article.article_count }} sources: +

+
+ {% for source in article.sources %} + + {{ source.name }} → + + {% endfor %} +
+ {% else %} + + + Read more → + + {% endif %} + + + + + {% if not loop.last %} + + +
+ + + {% endif %} + {% endfor %} + {% endif %} diff --git a/news_sender/sender_service.py b/news_sender/sender_service.py index ed5c0ea..8211896 100644 --- a/news_sender/sender_service.py +++ b/news_sender/sender_service.py @@ -81,13 +81,14 @@ subscribers_collection = db['subscribers'] def get_latest_articles(max_articles=10, hours=24): """ Get latest articles with AI summaries from database (from today only) + Includes cluster information for articles with multiple sources Args: max_articles: Maximum number of articles to return hours: Number of hours to look back (default 24) Returns: - list: Articles with summaries published today + list: Articles with summaries published today, including cluster info """ from datetime import timedelta @@ -97,6 +98,9 @@ def get_latest_articles(max_articles=10, hours=24): # Get start of today (00:00:00 UTC) today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) + # Get cluster summaries collection + cluster_summaries_collection = db['cluster_summaries'] + # Query for articles with summaries published today OR created today # This ensures we only get fresh articles from today cursor = articles_collection.find({ @@ -110,6 +114,8 @@ def get_latest_articles(max_articles=10, hours=24): }).sort('created_at', -1).limit(max_articles) articles = [] + processed_clusters = set() + for doc in cursor: # Double-check the date to ensure it's from today published_at = doc.get('published_at') @@ -123,16 +129,77 @@ def get_latest_articles(max_articles=10, hours=24): if created_at < today_start: continue - articles.append({ - 'title': doc.get('title', ''), - 'title_en': doc.get('title_en'), - 'translated_at': doc.get('translated_at'), - 'author': doc.get('author'), - 'link': doc.get('link', ''), - 'summary': doc.get('summary', ''), - 'source': doc.get('source', ''), - 'published_at': doc.get('published_at', '') - }) + cluster_id = doc.get('cluster_id') + + # Check if this article is part of a cluster + if cluster_id and cluster_id not in processed_clusters: + # Get cluster summary + cluster = cluster_summaries_collection.find_one({'cluster_id': cluster_id}) + + if cluster and cluster.get('article_count', 0) > 1: + # This is a clustered article - get all source links + processed_clusters.add(cluster_id) + + # Get all articles in this cluster + cluster_articles = list(articles_collection.find({ + 'cluster_id': cluster_id + })) + + # Build sources list with links + sources = [] + for art in cluster_articles: + sources.append({ + 'name': art.get('source', ''), + 'link': art.get('link', ''), + 'title': art.get('title', '') + }) + + articles.append({ + 'title': doc.get('title', ''), + 'title_en': doc.get('title_en'), + 'translated_at': doc.get('translated_at'), + 'author': doc.get('author'), + 'link': doc.get('link', ''), + 'summary': cluster.get('neutral_summary', doc.get('summary', '')), + 'source': doc.get('source', ''), + 'published_at': doc.get('published_at', ''), + 'is_clustered': True, + 'sources': sources, + 'article_count': len(sources) + }) + else: + # Single article (no cluster or cluster with only 1 article) + articles.append({ + 'title': doc.get('title', ''), + 'title_en': doc.get('title_en'), + 'translated_at': doc.get('translated_at'), + 'author': doc.get('author'), + 'link': doc.get('link', ''), + 'summary': doc.get('summary', ''), + 'source': doc.get('source', ''), + 'published_at': doc.get('published_at', ''), + 'is_clustered': False + }) + elif not cluster_id or cluster_id not in processed_clusters: + # No cluster - single article + articles.append({ + 'title': doc.get('title', ''), + 'title_en': doc.get('title_en'), + 'translated_at': doc.get('translated_at'), + 'author': doc.get('author'), + 'link': doc.get('link', ''), + 'summary': doc.get('summary', ''), + 'source': doc.get('source', ''), + 'published_at': doc.get('published_at', ''), + 'is_clustered': False + }) + + # Sort articles: clustered articles first (by source count), then by recency + # This prioritizes stories covered by multiple sources + articles.sort(key=lambda x: ( + -1 if x.get('is_clustered') else 0, # Clustered first + -x.get('article_count', 1), # More sources = higher priority + ), reverse=True) return articles @@ -170,13 +237,19 @@ def render_newsletter_html(articles, tracking_enabled=False, pixel_tracking_id=N template = Template(template_content) + # Split articles into sections + # Top 3 are "trending", rest are "other articles" + trending_articles = articles[:3] if len(articles) >= 3 else articles + other_articles = articles[3:] if len(articles) > 3 else [] + # Prepare template data now = datetime.now() template_data = { 'date': now.strftime('%A, %B %d, %Y'), 'year': now.year, 'article_count': len(articles), - 'articles': articles, + 'trending_articles': trending_articles, + 'other_articles': other_articles, 'unsubscribe_link': f'{Config.WEBSITE_URL}/unsubscribe', 'website_link': Config.WEBSITE_URL, 'tracking_enabled': tracking_enabled