update
This commit is contained in:
@@ -6,14 +6,25 @@ from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
|
||||
# Load environment variables from backend/.env
|
||||
backend_dir = Path(__file__).parent.parent / 'backend'
|
||||
env_path = backend_dir / '.env'
|
||||
# Try multiple locations (Docker vs local)
|
||||
env_locations = [
|
||||
Path('/app/.env'), # Docker location
|
||||
Path(__file__).parent.parent / 'backend' / '.env', # Local location
|
||||
Path(__file__).parent / '.env', # Current directory
|
||||
]
|
||||
|
||||
if env_path.exists():
|
||||
load_dotenv(dotenv_path=env_path)
|
||||
print(f"✓ Loaded configuration from: {env_path}")
|
||||
else:
|
||||
print(f"⚠ Warning: .env file not found at {env_path}")
|
||||
env_loaded = False
|
||||
for env_path in env_locations:
|
||||
if env_path.exists():
|
||||
load_dotenv(dotenv_path=env_path)
|
||||
print(f"✓ Loaded configuration from: {env_path}")
|
||||
env_loaded = True
|
||||
break
|
||||
|
||||
if not env_loaded:
|
||||
print(f"⚠ Warning: .env file not found in any of these locations:")
|
||||
for loc in env_locations:
|
||||
print(f" - {loc}")
|
||||
|
||||
|
||||
class Config:
|
||||
|
||||
@@ -344,6 +344,21 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
|
||||
article_data = extract_article_content(article_url)
|
||||
|
||||
if article_data and article_data.get('content'):
|
||||
# Store original title
|
||||
original_title = article_data.get('title') or entry.get('title', '')
|
||||
|
||||
# Translate title with Ollama if enabled
|
||||
translation_result = None
|
||||
if Config.OLLAMA_ENABLED and original_title:
|
||||
print(f" 🌐 Translating title...")
|
||||
translation_result = ollama_client.translate_title(original_title)
|
||||
|
||||
if translation_result and translation_result['success']:
|
||||
print(f" ✓ Title translated ({translation_result['duration']:.1f}s)")
|
||||
else:
|
||||
error_msg = translation_result['error'] if translation_result else 'Unknown error'
|
||||
print(f" ⚠ Translation failed: {error_msg}")
|
||||
|
||||
# Summarize with Ollama if enabled
|
||||
summary_result = None
|
||||
if Config.OLLAMA_ENABLED and article_data.get('content'):
|
||||
@@ -362,7 +377,8 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
|
||||
|
||||
# Prepare document
|
||||
article_doc = {
|
||||
'title': article_data.get('title') or entry.get('title', ''),
|
||||
'title': original_title,
|
||||
'title_en': translation_result['translated_title'] if translation_result and translation_result['success'] else None,
|
||||
'author': article_data.get('author'),
|
||||
'link': article_url,
|
||||
'content': article_data.get('content', ''), # Full article content
|
||||
@@ -373,6 +389,7 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
|
||||
'category': feed_category,
|
||||
'published_at': extract_published_date(entry) or article_data.get('published_date', ''),
|
||||
'crawled_at': article_data.get('crawled_at'),
|
||||
'translated_at': datetime.utcnow() if translation_result and translation_result['success'] else None,
|
||||
'summarized_at': datetime.utcnow() if summary_result and summary_result['success'] else None,
|
||||
'created_at': datetime.utcnow()
|
||||
}
|
||||
|
||||
@@ -160,6 +160,147 @@ class OllamaClient:
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
def translate_title(self, title, target_language='English'):
|
||||
"""
|
||||
Translate article title to target language
|
||||
|
||||
Args:
|
||||
title: Original title (typically German)
|
||||
target_language: Target language (default: 'English')
|
||||
|
||||
Returns:
|
||||
{
|
||||
'success': bool, # Whether translation succeeded
|
||||
'translated_title': str or None, # Translated title
|
||||
'error': str or None, # Error message if failed
|
||||
'duration': float # Time taken in seconds
|
||||
}
|
||||
"""
|
||||
if not self.enabled:
|
||||
return {
|
||||
'success': False,
|
||||
'translated_title': None,
|
||||
'error': 'Ollama is not enabled',
|
||||
'duration': 0
|
||||
}
|
||||
|
||||
if not title or len(title.strip()) == 0:
|
||||
return {
|
||||
'success': False,
|
||||
'translated_title': None,
|
||||
'error': 'Title is empty',
|
||||
'duration': 0
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Construct prompt
|
||||
prompt = self._build_translation_prompt(title, target_language)
|
||||
|
||||
# Prepare request
|
||||
url = f"{self.base_url}/api/generate"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||||
|
||||
payload = {
|
||||
'model': self.model,
|
||||
'prompt': prompt,
|
||||
'stream': False,
|
||||
'options': {
|
||||
'temperature': 0.3, # Lower temperature for consistent translations
|
||||
'num_predict': 100 # Limit response length for title-length outputs
|
||||
}
|
||||
}
|
||||
|
||||
# Make request
|
||||
response = requests.post(
|
||||
url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse response
|
||||
result = response.json()
|
||||
translated_title = result.get('response', '').strip()
|
||||
|
||||
if not translated_title:
|
||||
return {
|
||||
'success': False,
|
||||
'translated_title': None,
|
||||
'error': 'Ollama returned empty translation',
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
# Clean the translation output
|
||||
translated_title = self._clean_translation(translated_title)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'translated_title': translated_title,
|
||||
'error': None,
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return {
|
||||
'success': False,
|
||||
'translated_title': None,
|
||||
'error': f'Request timed out after {self.timeout} seconds',
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
except requests.exceptions.ConnectionError:
|
||||
return {
|
||||
'success': False,
|
||||
'translated_title': None,
|
||||
'error': f'Cannot connect to Ollama server at {self.base_url}',
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
except requests.exceptions.HTTPError as e:
|
||||
return {
|
||||
'success': False,
|
||||
'translated_title': None,
|
||||
'error': f'HTTP error: {e.response.status_code} - {e.response.text[:100]}',
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'success': False,
|
||||
'translated_title': None,
|
||||
'error': f'Unexpected error: {str(e)}',
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
def _build_translation_prompt(self, title, target_language):
|
||||
"""Build prompt for title translation"""
|
||||
prompt = f"""Translate the following German news headline to {target_language}. Provide only the translation without any explanations, quotes, or additional text.
|
||||
|
||||
German headline:
|
||||
{title}
|
||||
|
||||
{target_language} translation:"""
|
||||
|
||||
return prompt
|
||||
|
||||
def _clean_translation(self, translation):
|
||||
"""Clean translation output by removing quotes and extra text"""
|
||||
# Extract first line only
|
||||
translation = translation.split('\n')[0]
|
||||
|
||||
# Remove surrounding quotes (single and double)
|
||||
translation = translation.strip()
|
||||
if (translation.startswith('"') and translation.endswith('"')) or \
|
||||
(translation.startswith("'") and translation.endswith("'")):
|
||||
translation = translation[1:-1]
|
||||
|
||||
# Trim whitespace again after quote removal
|
||||
translation = translation.strip()
|
||||
|
||||
return translation
|
||||
|
||||
def _build_summarization_prompt(self, content, max_words):
|
||||
"""Build prompt for article summarization"""
|
||||
# Truncate content if too long (keep first 5000 words)
|
||||
|
||||
Reference in New Issue
Block a user