update
This commit is contained in:
90
backend/services/news_service.py
Normal file
90
backend/services/news_service.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import feedparser
|
||||
from datetime import datetime
|
||||
from pymongo.errors import DuplicateKeyError
|
||||
from database import articles_collection, rss_feeds_collection
|
||||
from utils.rss_utils import extract_article_url, extract_article_summary, extract_published_date
|
||||
|
||||
|
||||
def get_active_rss_feeds():
|
||||
"""Get all active RSS feeds from database"""
|
||||
feeds = []
|
||||
cursor = rss_feeds_collection.find({'active': True})
|
||||
for feed in cursor:
|
||||
feeds.append({
|
||||
'name': feed.get('name', ''),
|
||||
'url': feed.get('url', '')
|
||||
})
|
||||
return feeds
|
||||
|
||||
|
||||
def fetch_munich_news():
|
||||
"""Fetch news from Munich news sources"""
|
||||
articles = []
|
||||
|
||||
# Get RSS feeds from database instead of hardcoded list
|
||||
sources = get_active_rss_feeds()
|
||||
|
||||
for source in sources:
|
||||
try:
|
||||
feed = feedparser.parse(source['url'])
|
||||
for entry in feed.entries[:5]: # Get top 5 from each source
|
||||
# Extract article URL using utility function
|
||||
article_url = extract_article_url(entry)
|
||||
|
||||
if not article_url:
|
||||
print(f" ⚠ No valid URL for: {entry.get('title', 'Unknown')[:50]}")
|
||||
continue # Skip entries without valid URL
|
||||
|
||||
# Extract summary
|
||||
summary = extract_article_summary(entry)
|
||||
if summary:
|
||||
summary = summary[:200] + '...' if len(summary) > 200 else summary
|
||||
|
||||
articles.append({
|
||||
'title': entry.get('title', ''),
|
||||
'link': article_url,
|
||||
'summary': summary,
|
||||
'source': source['name'],
|
||||
'published': extract_published_date(entry)
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Error fetching from {source['name']}: {e}")
|
||||
|
||||
return articles
|
||||
|
||||
|
||||
def save_articles_to_db(articles):
|
||||
"""Save articles to MongoDB, avoiding duplicates"""
|
||||
saved_count = 0
|
||||
|
||||
for article in articles:
|
||||
try:
|
||||
# Prepare article document
|
||||
article_doc = {
|
||||
'title': article.get('title', ''),
|
||||
'link': article.get('link', ''),
|
||||
'summary': article.get('summary', ''),
|
||||
'source': article.get('source', ''),
|
||||
'published_at': article.get('published', ''),
|
||||
'created_at': datetime.utcnow()
|
||||
}
|
||||
|
||||
# Use update_one with upsert to handle duplicates
|
||||
# This will insert if link doesn't exist, or update if it does
|
||||
result = articles_collection.update_one(
|
||||
{'link': article_doc['link']},
|
||||
{'$setOnInsert': article_doc}, # Only set on insert, don't update existing
|
||||
upsert=True
|
||||
)
|
||||
|
||||
if result.upserted_id:
|
||||
saved_count += 1
|
||||
|
||||
except DuplicateKeyError:
|
||||
# Link already exists, skip
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"Error saving article {article.get('link', 'unknown')}: {e}")
|
||||
|
||||
if saved_count > 0:
|
||||
print(f"Saved {saved_count} new articles to database")
|
||||
Reference in New Issue
Block a user