This commit is contained in:
2025-11-11 16:58:03 +01:00
parent f23f4b71d8
commit 324751eb5d
14 changed files with 1108 additions and 18 deletions

View File

@@ -6,14 +6,25 @@ from dotenv import load_dotenv
from pathlib import Path
# Load environment variables from backend/.env
backend_dir = Path(__file__).parent.parent / 'backend'
env_path = backend_dir / '.env'
# Try multiple locations (Docker vs local)
env_locations = [
Path('/app/.env'), # Docker location
Path(__file__).parent.parent / 'backend' / '.env', # Local location
Path(__file__).parent / '.env', # Current directory
]
if env_path.exists():
load_dotenv(dotenv_path=env_path)
print(f"✓ Loaded configuration from: {env_path}")
else:
print(f"⚠ Warning: .env file not found at {env_path}")
env_loaded = False
for env_path in env_locations:
if env_path.exists():
load_dotenv(dotenv_path=env_path)
print(f"✓ Loaded configuration from: {env_path}")
env_loaded = True
break
if not env_loaded:
print(f"⚠ Warning: .env file not found in any of these locations:")
for loc in env_locations:
print(f" - {loc}")
class Config:

View File

@@ -344,6 +344,21 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
article_data = extract_article_content(article_url)
if article_data and article_data.get('content'):
# Store original title
original_title = article_data.get('title') or entry.get('title', '')
# Translate title with Ollama if enabled
translation_result = None
if Config.OLLAMA_ENABLED and original_title:
print(f" 🌐 Translating title...")
translation_result = ollama_client.translate_title(original_title)
if translation_result and translation_result['success']:
print(f" ✓ Title translated ({translation_result['duration']:.1f}s)")
else:
error_msg = translation_result['error'] if translation_result else 'Unknown error'
print(f" ⚠ Translation failed: {error_msg}")
# Summarize with Ollama if enabled
summary_result = None
if Config.OLLAMA_ENABLED and article_data.get('content'):
@@ -362,7 +377,8 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
# Prepare document
article_doc = {
'title': article_data.get('title') or entry.get('title', ''),
'title': original_title,
'title_en': translation_result['translated_title'] if translation_result and translation_result['success'] else None,
'author': article_data.get('author'),
'link': article_url,
'content': article_data.get('content', ''), # Full article content
@@ -373,6 +389,7 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
'category': feed_category,
'published_at': extract_published_date(entry) or article_data.get('published_date', ''),
'crawled_at': article_data.get('crawled_at'),
'translated_at': datetime.utcnow() if translation_result and translation_result['success'] else None,
'summarized_at': datetime.utcnow() if summary_result and summary_result['success'] else None,
'created_at': datetime.utcnow()
}

View File

@@ -160,6 +160,147 @@ class OllamaClient:
'duration': time.time() - start_time
}
def translate_title(self, title, target_language='English'):
"""
Translate article title to target language
Args:
title: Original title (typically German)
target_language: Target language (default: 'English')
Returns:
{
'success': bool, # Whether translation succeeded
'translated_title': str or None, # Translated title
'error': str or None, # Error message if failed
'duration': float # Time taken in seconds
}
"""
if not self.enabled:
return {
'success': False,
'translated_title': None,
'error': 'Ollama is not enabled',
'duration': 0
}
if not title or len(title.strip()) == 0:
return {
'success': False,
'translated_title': None,
'error': 'Title is empty',
'duration': 0
}
start_time = time.time()
try:
# Construct prompt
prompt = self._build_translation_prompt(title, target_language)
# Prepare request
url = f"{self.base_url}/api/generate"
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
payload = {
'model': self.model,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.3, # Lower temperature for consistent translations
'num_predict': 100 # Limit response length for title-length outputs
}
}
# Make request
response = requests.post(
url,
json=payload,
headers=headers,
timeout=self.timeout
)
response.raise_for_status()
# Parse response
result = response.json()
translated_title = result.get('response', '').strip()
if not translated_title:
return {
'success': False,
'translated_title': None,
'error': 'Ollama returned empty translation',
'duration': time.time() - start_time
}
# Clean the translation output
translated_title = self._clean_translation(translated_title)
return {
'success': True,
'translated_title': translated_title,
'error': None,
'duration': time.time() - start_time
}
except requests.exceptions.Timeout:
return {
'success': False,
'translated_title': None,
'error': f'Request timed out after {self.timeout} seconds',
'duration': time.time() - start_time
}
except requests.exceptions.ConnectionError:
return {
'success': False,
'translated_title': None,
'error': f'Cannot connect to Ollama server at {self.base_url}',
'duration': time.time() - start_time
}
except requests.exceptions.HTTPError as e:
return {
'success': False,
'translated_title': None,
'error': f'HTTP error: {e.response.status_code} - {e.response.text[:100]}',
'duration': time.time() - start_time
}
except Exception as e:
return {
'success': False,
'translated_title': None,
'error': f'Unexpected error: {str(e)}',
'duration': time.time() - start_time
}
def _build_translation_prompt(self, title, target_language):
"""Build prompt for title translation"""
prompt = f"""Translate the following German news headline to {target_language}. Provide only the translation without any explanations, quotes, or additional text.
German headline:
{title}
{target_language} translation:"""
return prompt
def _clean_translation(self, translation):
"""Clean translation output by removing quotes and extra text"""
# Extract first line only
translation = translation.split('\n')[0]
# Remove surrounding quotes (single and double)
translation = translation.strip()
if (translation.startswith('"') and translation.endswith('"')) or \
(translation.startswith("'") and translation.endswith("'")):
translation = translation[1:-1]
# Trim whitespace again after quote removal
translation = translation.strip()
return translation
def _build_summarization_prompt(self, content, max_words):
"""Build prompt for article summarization"""
# Truncate content if too long (keep first 5000 words)