This commit is contained in:
2025-11-10 19:13:33 +01:00
commit ac5738c29d
64 changed files with 9445 additions and 0 deletions

View File

@@ -0,0 +1,90 @@
import feedparser
from datetime import datetime
from pymongo.errors import DuplicateKeyError
from database import articles_collection, rss_feeds_collection
from utils.rss_utils import extract_article_url, extract_article_summary, extract_published_date
def get_active_rss_feeds():
"""Get all active RSS feeds from database"""
feeds = []
cursor = rss_feeds_collection.find({'active': True})
for feed in cursor:
feeds.append({
'name': feed.get('name', ''),
'url': feed.get('url', '')
})
return feeds
def fetch_munich_news():
"""Fetch news from Munich news sources"""
articles = []
# Get RSS feeds from database instead of hardcoded list
sources = get_active_rss_feeds()
for source in sources:
try:
feed = feedparser.parse(source['url'])
for entry in feed.entries[:5]: # Get top 5 from each source
# Extract article URL using utility function
article_url = extract_article_url(entry)
if not article_url:
print(f" ⚠ No valid URL for: {entry.get('title', 'Unknown')[:50]}")
continue # Skip entries without valid URL
# Extract summary
summary = extract_article_summary(entry)
if summary:
summary = summary[:200] + '...' if len(summary) > 200 else summary
articles.append({
'title': entry.get('title', ''),
'link': article_url,
'summary': summary,
'source': source['name'],
'published': extract_published_date(entry)
})
except Exception as e:
print(f"Error fetching from {source['name']}: {e}")
return articles
def save_articles_to_db(articles):
"""Save articles to MongoDB, avoiding duplicates"""
saved_count = 0
for article in articles:
try:
# Prepare article document
article_doc = {
'title': article.get('title', ''),
'link': article.get('link', ''),
'summary': article.get('summary', ''),
'source': article.get('source', ''),
'published_at': article.get('published', ''),
'created_at': datetime.utcnow()
}
# Use update_one with upsert to handle duplicates
# This will insert if link doesn't exist, or update if it does
result = articles_collection.update_one(
{'link': article_doc['link']},
{'$setOnInsert': article_doc}, # Only set on insert, don't update existing
upsert=True
)
if result.upserted_id:
saved_count += 1
except DuplicateKeyError:
# Link already exists, skip
pass
except Exception as e:
print(f"Error saving article {article.get('link', 'unknown')}: {e}")
if saved_count > 0:
print(f"Saved {saved_count} new articles to database")