update
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from flask import Blueprint, jsonify
|
||||
from database import articles_collection
|
||||
from flask import Blueprint, jsonify, request
|
||||
from database import articles_collection, db
|
||||
from services.news_service import fetch_munich_news, save_articles_to_db
|
||||
|
||||
news_bp = Blueprint('news', __name__)
|
||||
@@ -9,6 +9,12 @@ news_bp = Blueprint('news', __name__)
|
||||
def get_news():
|
||||
"""Get latest Munich news"""
|
||||
try:
|
||||
# Check if clustered mode is requested
|
||||
mode = request.args.get('mode', 'all')
|
||||
|
||||
if mode == 'clustered':
|
||||
return get_clustered_news_internal()
|
||||
|
||||
# Fetch fresh news and save to database
|
||||
articles = fetch_munich_news()
|
||||
save_articles_to_db(articles)
|
||||
@@ -63,6 +69,95 @@ def get_news():
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
def get_clustered_news_internal():
|
||||
"""
|
||||
Get news with neutral summaries for clustered articles
|
||||
Returns only primary articles with their neutral summaries
|
||||
Prioritizes stories covered by multiple sources (more popular/important)
|
||||
"""
|
||||
try:
|
||||
limit = int(request.args.get('limit', 20))
|
||||
|
||||
# Use aggregation to get articles with their cluster size
|
||||
# This allows us to prioritize multi-source stories
|
||||
pipeline = [
|
||||
{"$match": {"is_primary": True}},
|
||||
{"$lookup": {
|
||||
"from": "articles",
|
||||
"localField": "cluster_id",
|
||||
"foreignField": "cluster_id",
|
||||
"as": "cluster_articles"
|
||||
}},
|
||||
{"$addFields": {
|
||||
"article_count": {"$size": "$cluster_articles"},
|
||||
"sources_list": {"$setUnion": ["$cluster_articles.source", []]}
|
||||
}},
|
||||
{"$addFields": {
|
||||
"source_count": {"$size": "$sources_list"}
|
||||
}},
|
||||
# Sort by: 1) source count (desc), 2) published date (desc)
|
||||
{"$sort": {"source_count": -1, "published_at": -1}},
|
||||
{"$limit": limit}
|
||||
]
|
||||
|
||||
cursor = articles_collection.aggregate(pipeline)
|
||||
|
||||
result = []
|
||||
cluster_summaries_collection = db['cluster_summaries']
|
||||
|
||||
for doc in cursor:
|
||||
cluster_id = doc.get('cluster_id')
|
||||
|
||||
# Get neutral summary if available
|
||||
cluster_summary = cluster_summaries_collection.find_one({'cluster_id': cluster_id})
|
||||
|
||||
# Use cluster_articles from aggregation (already fetched)
|
||||
cluster_articles = doc.get('cluster_articles', [])
|
||||
|
||||
article = {
|
||||
'title': doc.get('title', ''),
|
||||
'link': doc.get('link', ''),
|
||||
'source': doc.get('source', ''),
|
||||
'published': doc.get('published_at', ''),
|
||||
'category': doc.get('category', 'general'),
|
||||
'cluster_id': cluster_id,
|
||||
'article_count': doc.get('article_count', 1),
|
||||
'source_count': doc.get('source_count', 1),
|
||||
'sources': list(doc.get('sources_list', [doc.get('source', '')]))
|
||||
}
|
||||
|
||||
# Use neutral summary if available, otherwise use article's own summary
|
||||
if cluster_summary and doc.get('article_count', 1) > 1:
|
||||
article['summary'] = cluster_summary.get('neutral_summary', '')
|
||||
article['summary_type'] = 'neutral'
|
||||
article['is_clustered'] = True
|
||||
else:
|
||||
article['summary'] = doc.get('summary', '')
|
||||
article['summary_type'] = 'individual'
|
||||
article['is_clustered'] = False
|
||||
|
||||
# Add related articles info
|
||||
if doc.get('article_count', 1) > 1:
|
||||
article['related_articles'] = [
|
||||
{
|
||||
'source': a.get('source', ''),
|
||||
'title': a.get('title', ''),
|
||||
'link': a.get('link', '')
|
||||
}
|
||||
for a in cluster_articles if a.get('_id') != doc.get('_id')
|
||||
]
|
||||
|
||||
result.append(article)
|
||||
|
||||
return jsonify({
|
||||
'articles': result,
|
||||
'mode': 'clustered',
|
||||
'description': 'Shows one article per story with neutral summaries'
|
||||
}), 200
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@news_bp.route('/api/news/<path:article_url>', methods=['GET'])
|
||||
def get_article_by_url(article_url):
|
||||
"""Get full article content by URL"""
|
||||
@@ -113,11 +208,20 @@ def get_stats():
|
||||
# Count summarized articles
|
||||
summarized_count = articles_collection.count_documents({'summary': {'$exists': True, '$ne': ''}})
|
||||
|
||||
# Count clustered articles
|
||||
clustered_count = articles_collection.count_documents({'cluster_id': {'$exists': True}})
|
||||
|
||||
# Count cluster summaries
|
||||
cluster_summaries_collection = db['cluster_summaries']
|
||||
neutral_summaries_count = cluster_summaries_collection.count_documents({})
|
||||
|
||||
return jsonify({
|
||||
'subscribers': subscriber_count,
|
||||
'articles': article_count,
|
||||
'crawled_articles': crawled_count,
|
||||
'summarized_articles': summarized_count
|
||||
'summarized_articles': summarized_count,
|
||||
'clustered_articles': clustered_count,
|
||||
'neutral_summaries': neutral_summaries_count
|
||||
}), 200
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
Reference in New Issue
Block a user