import feedparser from datetime import datetime from pymongo.errors import DuplicateKeyError from database import articles_collection, rss_feeds_collection from utils.rss_utils import extract_article_url, extract_article_summary, extract_published_date def get_active_rss_feeds(): """Get all active RSS feeds from database""" feeds = [] cursor = rss_feeds_collection.find({'active': True}) for feed in cursor: feeds.append({ 'name': feed.get('name', ''), 'url': feed.get('url', '') }) return feeds def fetch_munich_news(): """Fetch news from Munich news sources""" articles = [] # Get RSS feeds from database instead of hardcoded list sources = get_active_rss_feeds() for source in sources: try: feed = feedparser.parse(source['url']) for entry in feed.entries[:5]: # Get top 5 from each source # Extract article URL using utility function article_url = extract_article_url(entry) if not article_url: print(f" ⚠ No valid URL for: {entry.get('title', 'Unknown')[:50]}") continue # Skip entries without valid URL # Extract summary summary = extract_article_summary(entry) if summary: summary = summary[:200] + '...' if len(summary) > 200 else summary articles.append({ 'title': entry.get('title', ''), 'link': article_url, 'summary': summary, 'source': source['name'], 'published': extract_published_date(entry) }) except Exception as e: print(f"Error fetching from {source['name']}: {e}") return articles def save_articles_to_db(articles): """Save articles to MongoDB, avoiding duplicates""" saved_count = 0 for article in articles: try: # Prepare article document article_doc = { 'title': article.get('title', ''), 'link': article.get('link', ''), 'summary': article.get('summary', ''), 'source': article.get('source', ''), 'published_at': article.get('published', ''), 'created_at': datetime.utcnow() } # Use update_one with upsert to handle duplicates # This will insert if link doesn't exist, or update if it does result = articles_collection.update_one( {'link': article_doc['link']}, {'$setOnInsert': article_doc}, # Only set on insert, don't update existing upsert=True ) if result.upserted_id: saved_count += 1 except DuplicateKeyError: # Link already exists, skip pass except Exception as e: print(f"Error saving article {article.get('link', 'unknown')}: {e}") if saved_count > 0: print(f"Saved {saved_count} new articles to database")