from flask import Blueprint, jsonify, request from database import articles_collection, db from services.news_service import fetch_munich_news, save_articles_to_db news_bp = Blueprint('news', __name__) @news_bp.route('/api/news', methods=['GET']) def get_news(): """Get latest Munich news""" try: # Check if clustered mode is requested mode = request.args.get('mode', 'all') if mode == 'clustered': return get_clustered_news_internal() # Fetch fresh news and save to database articles = fetch_munich_news() save_articles_to_db(articles) # Get articles from MongoDB, sorted by created_at (newest first) cursor = articles_collection.find().sort('created_at', -1).limit(20) db_articles = [] for doc in cursor: article = { 'title': doc.get('title', ''), 'author': doc.get('author'), 'link': doc.get('link', ''), 'source': doc.get('source', ''), 'published': doc.get('published_at', ''), 'word_count': doc.get('word_count'), 'has_full_content': bool(doc.get('content')), 'has_summary': bool(doc.get('summary')) } # Include AI summary if available if doc.get('summary'): article['summary'] = doc.get('summary', '') article['summary_word_count'] = doc.get('summary_word_count') article['summarized_at'] = doc.get('summarized_at', '').isoformat() if doc.get('summarized_at') else None # Fallback: Include preview of content if no summary (first 200 chars) elif doc.get('content'): article['preview'] = doc.get('content', '')[:200] + '...' db_articles.append(article) # Combine fresh articles with database articles and deduplicate seen_links = set() combined = [] # Add fresh articles first (they're more recent) for article in articles: link = article.get('link', '') if link and link not in seen_links: seen_links.add(link) combined.append(article) # Add database articles for article in db_articles: link = article.get('link', '') if link and link not in seen_links: seen_links.add(link) combined.append(article) return jsonify({'articles': combined[:20]}), 200 except Exception as e: return jsonify({'error': str(e)}), 500 def get_clustered_news_internal(): """ Get news with neutral summaries for clustered articles Returns only primary articles with their neutral summaries Prioritizes stories covered by multiple sources (more popular/important) """ try: limit = int(request.args.get('limit', 20)) # Use aggregation to get articles with their cluster size # This allows us to prioritize multi-source stories pipeline = [ {"$match": {"is_primary": True}}, {"$lookup": { "from": "articles", "localField": "cluster_id", "foreignField": "cluster_id", "as": "cluster_articles" }}, {"$addFields": { "article_count": {"$size": "$cluster_articles"}, "sources_list": {"$setUnion": ["$cluster_articles.source", []]} }}, {"$addFields": { "source_count": {"$size": "$sources_list"} }}, # Sort by: 1) source count (desc), 2) published date (desc) {"$sort": {"source_count": -1, "published_at": -1}}, {"$limit": limit} ] cursor = articles_collection.aggregate(pipeline) result = [] cluster_summaries_collection = db['cluster_summaries'] for doc in cursor: cluster_id = doc.get('cluster_id') # Get neutral summary if available cluster_summary = cluster_summaries_collection.find_one({'cluster_id': cluster_id}) # Use cluster_articles from aggregation (already fetched) cluster_articles = doc.get('cluster_articles', []) article = { 'title': doc.get('title', ''), 'link': doc.get('link', ''), 'source': doc.get('source', ''), 'published': doc.get('published_at', ''), 'category': doc.get('category', 'general'), 'cluster_id': cluster_id, 'article_count': doc.get('article_count', 1), 'source_count': doc.get('source_count', 1), 'sources': list(doc.get('sources_list', [doc.get('source', '')])) } # Use neutral summary if available, otherwise use article's own summary if cluster_summary and doc.get('article_count', 1) > 1: article['summary'] = cluster_summary.get('neutral_summary', '') article['summary_type'] = 'neutral' article['is_clustered'] = True else: article['summary'] = doc.get('summary', '') article['summary_type'] = 'individual' article['is_clustered'] = False # Add related articles info if doc.get('article_count', 1) > 1: article['related_articles'] = [ { 'source': a.get('source', ''), 'title': a.get('title', ''), 'link': a.get('link', '') } for a in cluster_articles if a.get('_id') != doc.get('_id') ] result.append(article) return jsonify({ 'articles': result, 'mode': 'clustered', 'description': 'Shows one article per story with neutral summaries' }), 200 except Exception as e: return jsonify({'error': str(e)}), 500 @news_bp.route('/api/news/', methods=['GET']) def get_article_by_url(article_url): """Get full article content by URL""" try: # Decode URL from urllib.parse import unquote decoded_url = unquote(article_url) # Find article by link article = articles_collection.find_one({'link': decoded_url}) if not article: return jsonify({'error': 'Article not found'}), 404 return jsonify({ 'title': article.get('title', ''), 'author': article.get('author'), 'link': article.get('link', ''), 'content': article.get('content', ''), 'summary': article.get('summary'), 'word_count': article.get('word_count', 0), 'summary_word_count': article.get('summary_word_count'), 'source': article.get('source', ''), 'published_at': article.get('published_at', ''), 'crawled_at': article.get('crawled_at', '').isoformat() if article.get('crawled_at') else None, 'summarized_at': article.get('summarized_at', '').isoformat() if article.get('summarized_at') else None, 'created_at': article.get('created_at', '').isoformat() if article.get('created_at') else None }), 200 except Exception as e: return jsonify({'error': str(e)}), 500 @news_bp.route('/api/stats', methods=['GET']) def get_stats(): """Get subscription statistics""" try: from database import subscribers_collection # Count only active subscribers subscriber_count = subscribers_collection.count_documents({'status': 'active'}) # Also get total article count article_count = articles_collection.count_documents({}) # Count crawled articles crawled_count = articles_collection.count_documents({'content': {'$exists': True, '$ne': ''}}) # Count summarized articles summarized_count = articles_collection.count_documents({'summary': {'$exists': True, '$ne': ''}}) # Count clustered articles clustered_count = articles_collection.count_documents({'cluster_id': {'$exists': True}}) # Count cluster summaries cluster_summaries_collection = db['cluster_summaries'] neutral_summaries_count = cluster_summaries_collection.count_documents({}) return jsonify({ 'subscribers': subscriber_count, 'articles': article_count, 'crawled_articles': crawled_count, 'summarized_articles': summarized_count, 'clustered_articles': clustered_count, 'neutral_summaries': neutral_summaries_count }), 200 except Exception as e: return jsonify({'error': str(e)}), 500