This commit is contained in:
2025-11-11 16:58:03 +01:00
parent f23f4b71d8
commit 324751eb5d
14 changed files with 1108 additions and 18 deletions

View File

@@ -344,6 +344,21 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
article_data = extract_article_content(article_url)
if article_data and article_data.get('content'):
# Store original title
original_title = article_data.get('title') or entry.get('title', '')
# Translate title with Ollama if enabled
translation_result = None
if Config.OLLAMA_ENABLED and original_title:
print(f" 🌐 Translating title...")
translation_result = ollama_client.translate_title(original_title)
if translation_result and translation_result['success']:
print(f" ✓ Title translated ({translation_result['duration']:.1f}s)")
else:
error_msg = translation_result['error'] if translation_result else 'Unknown error'
print(f" ⚠ Translation failed: {error_msg}")
# Summarize with Ollama if enabled
summary_result = None
if Config.OLLAMA_ENABLED and article_data.get('content'):
@@ -362,7 +377,8 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
# Prepare document
article_doc = {
'title': article_data.get('title') or entry.get('title', ''),
'title': original_title,
'title_en': translation_result['translated_title'] if translation_result and translation_result['success'] else None,
'author': article_data.get('author'),
'link': article_url,
'content': article_data.get('content', ''), # Full article content
@@ -373,6 +389,7 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
'category': feed_category,
'published_at': extract_published_date(entry) or article_data.get('published_date', ''),
'crawled_at': article_data.get('crawled_at'),
'translated_at': datetime.utcnow() if translation_result and translation_result['success'] else None,
'summarized_at': datetime.utcnow() if summary_result and summary_result['success'] else None,
'created_at': datetime.utcnow()
}