update
This commit is contained in:
@@ -81,13 +81,14 @@ subscribers_collection = db['subscribers']
|
||||
def get_latest_articles(max_articles=10, hours=24):
|
||||
"""
|
||||
Get latest articles with AI summaries from database (from today only)
|
||||
Includes cluster information for articles with multiple sources
|
||||
|
||||
Args:
|
||||
max_articles: Maximum number of articles to return
|
||||
hours: Number of hours to look back (default 24)
|
||||
|
||||
Returns:
|
||||
list: Articles with summaries published today
|
||||
list: Articles with summaries published today, including cluster info
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
@@ -97,6 +98,9 @@ def get_latest_articles(max_articles=10, hours=24):
|
||||
# Get start of today (00:00:00 UTC)
|
||||
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
# Get cluster summaries collection
|
||||
cluster_summaries_collection = db['cluster_summaries']
|
||||
|
||||
# Query for articles with summaries published today OR created today
|
||||
# This ensures we only get fresh articles from today
|
||||
cursor = articles_collection.find({
|
||||
@@ -110,6 +114,8 @@ def get_latest_articles(max_articles=10, hours=24):
|
||||
}).sort('created_at', -1).limit(max_articles)
|
||||
|
||||
articles = []
|
||||
processed_clusters = set()
|
||||
|
||||
for doc in cursor:
|
||||
# Double-check the date to ensure it's from today
|
||||
published_at = doc.get('published_at')
|
||||
@@ -123,16 +129,77 @@ def get_latest_articles(max_articles=10, hours=24):
|
||||
if created_at < today_start:
|
||||
continue
|
||||
|
||||
articles.append({
|
||||
'title': doc.get('title', ''),
|
||||
'title_en': doc.get('title_en'),
|
||||
'translated_at': doc.get('translated_at'),
|
||||
'author': doc.get('author'),
|
||||
'link': doc.get('link', ''),
|
||||
'summary': doc.get('summary', ''),
|
||||
'source': doc.get('source', ''),
|
||||
'published_at': doc.get('published_at', '')
|
||||
})
|
||||
cluster_id = doc.get('cluster_id')
|
||||
|
||||
# Check if this article is part of a cluster
|
||||
if cluster_id and cluster_id not in processed_clusters:
|
||||
# Get cluster summary
|
||||
cluster = cluster_summaries_collection.find_one({'cluster_id': cluster_id})
|
||||
|
||||
if cluster and cluster.get('article_count', 0) > 1:
|
||||
# This is a clustered article - get all source links
|
||||
processed_clusters.add(cluster_id)
|
||||
|
||||
# Get all articles in this cluster
|
||||
cluster_articles = list(articles_collection.find({
|
||||
'cluster_id': cluster_id
|
||||
}))
|
||||
|
||||
# Build sources list with links
|
||||
sources = []
|
||||
for art in cluster_articles:
|
||||
sources.append({
|
||||
'name': art.get('source', ''),
|
||||
'link': art.get('link', ''),
|
||||
'title': art.get('title', '')
|
||||
})
|
||||
|
||||
articles.append({
|
||||
'title': doc.get('title', ''),
|
||||
'title_en': doc.get('title_en'),
|
||||
'translated_at': doc.get('translated_at'),
|
||||
'author': doc.get('author'),
|
||||
'link': doc.get('link', ''),
|
||||
'summary': cluster.get('neutral_summary', doc.get('summary', '')),
|
||||
'source': doc.get('source', ''),
|
||||
'published_at': doc.get('published_at', ''),
|
||||
'is_clustered': True,
|
||||
'sources': sources,
|
||||
'article_count': len(sources)
|
||||
})
|
||||
else:
|
||||
# Single article (no cluster or cluster with only 1 article)
|
||||
articles.append({
|
||||
'title': doc.get('title', ''),
|
||||
'title_en': doc.get('title_en'),
|
||||
'translated_at': doc.get('translated_at'),
|
||||
'author': doc.get('author'),
|
||||
'link': doc.get('link', ''),
|
||||
'summary': doc.get('summary', ''),
|
||||
'source': doc.get('source', ''),
|
||||
'published_at': doc.get('published_at', ''),
|
||||
'is_clustered': False
|
||||
})
|
||||
elif not cluster_id or cluster_id not in processed_clusters:
|
||||
# No cluster - single article
|
||||
articles.append({
|
||||
'title': doc.get('title', ''),
|
||||
'title_en': doc.get('title_en'),
|
||||
'translated_at': doc.get('translated_at'),
|
||||
'author': doc.get('author'),
|
||||
'link': doc.get('link', ''),
|
||||
'summary': doc.get('summary', ''),
|
||||
'source': doc.get('source', ''),
|
||||
'published_at': doc.get('published_at', ''),
|
||||
'is_clustered': False
|
||||
})
|
||||
|
||||
# Sort articles: clustered articles first (by source count), then by recency
|
||||
# This prioritizes stories covered by multiple sources
|
||||
articles.sort(key=lambda x: (
|
||||
-1 if x.get('is_clustered') else 0, # Clustered first
|
||||
-x.get('article_count', 1), # More sources = higher priority
|
||||
), reverse=True)
|
||||
|
||||
return articles
|
||||
|
||||
@@ -170,13 +237,19 @@ def render_newsletter_html(articles, tracking_enabled=False, pixel_tracking_id=N
|
||||
|
||||
template = Template(template_content)
|
||||
|
||||
# Split articles into sections
|
||||
# Top 3 are "trending", rest are "other articles"
|
||||
trending_articles = articles[:3] if len(articles) >= 3 else articles
|
||||
other_articles = articles[3:] if len(articles) > 3 else []
|
||||
|
||||
# Prepare template data
|
||||
now = datetime.now()
|
||||
template_data = {
|
||||
'date': now.strftime('%A, %B %d, %Y'),
|
||||
'year': now.year,
|
||||
'article_count': len(articles),
|
||||
'articles': articles,
|
||||
'trending_articles': trending_articles,
|
||||
'other_articles': other_articles,
|
||||
'unsubscribe_link': f'{Config.WEBSITE_URL}/unsubscribe',
|
||||
'website_link': Config.WEBSITE_URL,
|
||||
'tracking_enabled': tracking_enabled
|
||||
|
||||
Reference in New Issue
Block a user