diff --git a/news_sender/newsletter_template.html b/news_sender/newsletter_template.html
index d4e95a1..95193b0 100644
--- a/news_sender/newsletter_template.html
+++ b/news_sender/newsletter_template.html
@@ -50,8 +50,27 @@
-
- {% for article in articles %}
+
+ {% if trending_articles %}
+
+
+
+
+
+
+ 🔥 Top Trending in Munich
+
+
+ The most talked-about stories today
+
+ |
+
+
+ |
+
+
+
+ {% for article in trending_articles %}
|
@@ -79,10 +98,14 @@
+ {% if article.is_clustered %}
+ Multiple sources
+ {% else %}
{{ article.source }}
{% if article.author %}
• {{ article.author }}
{% endif %}
+ {% endif %}
@@ -90,10 +113,25 @@
{{ article.summary }}
-
+
+ {% if article.is_clustered and article.sources %}
+
+
+ 📰 Covered by {{ article.article_count }} sources:
+
+
+ {% else %}
+
Read more →
+ {% endif %}
|
@@ -106,6 +144,110 @@
{% endif %}
{% endfor %}
+ {% endif %}
+
+
+ {% if other_articles %}
+
+
+ |
+
+ |
+
+
+
+
+
+
+
+
+ 📰 More Stories
+
+
+ Additional news from around Munich
+
+ |
+
+
+ |
+
+
+
+ {% for article in other_articles %}
+
+
+
+
+
+ |
+
+ {{ loop.index + trending_articles|length }}
+
+ |
+
+
+
+
+
+ {{ article.title_en if article.title_en else article.title }}
+
+
+
+ {% if article.title_en and article.title_en != article.title %}
+
+ Original: {{ article.title }}
+
+ {% endif %}
+
+
+
+ {% if article.is_clustered %}
+ Multiple sources
+ {% else %}
+ {{ article.source }}
+ {% if article.author %}
+ • {{ article.author }}
+ {% endif %}
+ {% endif %}
+
+
+
+
+ {{ article.summary }}
+
+
+
+ {% if article.is_clustered and article.sources %}
+
+
+ 📰 Covered by {{ article.article_count }} sources:
+
+
+ {% else %}
+
+
+ Read more →
+
+ {% endif %}
+ |
+
+
+
+ {% if not loop.last %}
+
+ |
+
+ |
+
+ {% endif %}
+ {% endfor %}
+ {% endif %}
diff --git a/news_sender/sender_service.py b/news_sender/sender_service.py
index ed5c0ea..8211896 100644
--- a/news_sender/sender_service.py
+++ b/news_sender/sender_service.py
@@ -81,13 +81,14 @@ subscribers_collection = db['subscribers']
def get_latest_articles(max_articles=10, hours=24):
"""
Get latest articles with AI summaries from database (from today only)
+ Includes cluster information for articles with multiple sources
Args:
max_articles: Maximum number of articles to return
hours: Number of hours to look back (default 24)
Returns:
- list: Articles with summaries published today
+ list: Articles with summaries published today, including cluster info
"""
from datetime import timedelta
@@ -97,6 +98,9 @@ def get_latest_articles(max_articles=10, hours=24):
# Get start of today (00:00:00 UTC)
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
+ # Get cluster summaries collection
+ cluster_summaries_collection = db['cluster_summaries']
+
# Query for articles with summaries published today OR created today
# This ensures we only get fresh articles from today
cursor = articles_collection.find({
@@ -110,6 +114,8 @@ def get_latest_articles(max_articles=10, hours=24):
}).sort('created_at', -1).limit(max_articles)
articles = []
+ processed_clusters = set()
+
for doc in cursor:
# Double-check the date to ensure it's from today
published_at = doc.get('published_at')
@@ -123,16 +129,77 @@ def get_latest_articles(max_articles=10, hours=24):
if created_at < today_start:
continue
- articles.append({
- 'title': doc.get('title', ''),
- 'title_en': doc.get('title_en'),
- 'translated_at': doc.get('translated_at'),
- 'author': doc.get('author'),
- 'link': doc.get('link', ''),
- 'summary': doc.get('summary', ''),
- 'source': doc.get('source', ''),
- 'published_at': doc.get('published_at', '')
- })
+ cluster_id = doc.get('cluster_id')
+
+ # Check if this article is part of a cluster
+ if cluster_id and cluster_id not in processed_clusters:
+ # Get cluster summary
+ cluster = cluster_summaries_collection.find_one({'cluster_id': cluster_id})
+
+ if cluster and cluster.get('article_count', 0) > 1:
+ # This is a clustered article - get all source links
+ processed_clusters.add(cluster_id)
+
+ # Get all articles in this cluster
+ cluster_articles = list(articles_collection.find({
+ 'cluster_id': cluster_id
+ }))
+
+ # Build sources list with links
+ sources = []
+ for art in cluster_articles:
+ sources.append({
+ 'name': art.get('source', ''),
+ 'link': art.get('link', ''),
+ 'title': art.get('title', '')
+ })
+
+ articles.append({
+ 'title': doc.get('title', ''),
+ 'title_en': doc.get('title_en'),
+ 'translated_at': doc.get('translated_at'),
+ 'author': doc.get('author'),
+ 'link': doc.get('link', ''),
+ 'summary': cluster.get('neutral_summary', doc.get('summary', '')),
+ 'source': doc.get('source', ''),
+ 'published_at': doc.get('published_at', ''),
+ 'is_clustered': True,
+ 'sources': sources,
+ 'article_count': len(sources)
+ })
+ else:
+ # Single article (no cluster or cluster with only 1 article)
+ articles.append({
+ 'title': doc.get('title', ''),
+ 'title_en': doc.get('title_en'),
+ 'translated_at': doc.get('translated_at'),
+ 'author': doc.get('author'),
+ 'link': doc.get('link', ''),
+ 'summary': doc.get('summary', ''),
+ 'source': doc.get('source', ''),
+ 'published_at': doc.get('published_at', ''),
+ 'is_clustered': False
+ })
+ elif not cluster_id or cluster_id not in processed_clusters:
+ # No cluster - single article
+ articles.append({
+ 'title': doc.get('title', ''),
+ 'title_en': doc.get('title_en'),
+ 'translated_at': doc.get('translated_at'),
+ 'author': doc.get('author'),
+ 'link': doc.get('link', ''),
+ 'summary': doc.get('summary', ''),
+ 'source': doc.get('source', ''),
+ 'published_at': doc.get('published_at', ''),
+ 'is_clustered': False
+ })
+
+ # Sort articles: clustered articles first (by source count), then by recency
+ # This prioritizes stories covered by multiple sources
+ articles.sort(key=lambda x: (
+ -1 if x.get('is_clustered') else 0, # Clustered first
+ -x.get('article_count', 1), # More sources = higher priority
+ ), reverse=True)
return articles
@@ -170,13 +237,19 @@ def render_newsletter_html(articles, tracking_enabled=False, pixel_tracking_id=N
template = Template(template_content)
+ # Split articles into sections
+ # Top 3 are "trending", rest are "other articles"
+ trending_articles = articles[:3] if len(articles) >= 3 else articles
+ other_articles = articles[3:] if len(articles) > 3 else []
+
# Prepare template data
now = datetime.now()
template_data = {
'date': now.strftime('%A, %B %d, %Y'),
'year': now.year,
'article_count': len(articles),
- 'articles': articles,
+ 'trending_articles': trending_articles,
+ 'other_articles': other_articles,
'unsubscribe_link': f'{Config.WEBSITE_URL}/unsubscribe',
'website_link': Config.WEBSITE_URL,
'tracking_enabled': tracking_enabled