This commit is contained in:
2025-11-18 14:45:41 +01:00
parent 2e80d64ff6
commit 84fce9a82c
19 changed files with 2437 additions and 3 deletions

View File

@@ -508,6 +508,110 @@ New York Times-style summary (max {max_words} words):"""
'error': str(e),
'duration': time.time() - start_time
}
def extract_keywords(self, title, summary, max_keywords=5):
"""
Extract keywords/topics from article for personalization
Args:
title: Article title
summary: Article summary
max_keywords: Maximum number of keywords to extract (default 5)
Returns:
{
'keywords': list, # List of extracted keywords
'success': bool, # Whether extraction succeeded
'error': str or None, # Error message if failed
'duration': float # Time taken in seconds
}
"""
if not self.enabled:
return {
'keywords': [],
'success': False,
'error': 'Ollama is disabled',
'duration': 0
}
start_time = time.time()
try:
# Construct prompt for keyword extraction
prompt = f"""Extract {max_keywords} key topics or keywords from this article. These will be used to understand user interests.
Title: {title}
Summary: {summary}
Return ONLY the keywords separated by commas, nothing else. Focus on:
- Main topics (e.g., "Bayern Munich", "Oktoberfest", "City Council")
- Locations (e.g., "Marienplatz", "Airport")
- Events or themes (e.g., "Transportation", "Housing", "Technology")
Keywords:"""
# Prepare request
url = f"{self.base_url}/api/generate"
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
payload = {
'model': self.model,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.3, # Lower temperature for consistent extraction
'num_predict': 100 # Limit response length
}
}
# Make request
response = requests.post(
url,
json=payload,
headers=headers,
timeout=self.timeout
)
response.raise_for_status()
# Parse response
result = response.json()
keywords_text = result.get('response', '').strip()
if not keywords_text:
return {
'keywords': [],
'success': False,
'error': 'Ollama returned empty response',
'duration': time.time() - start_time
}
# Parse keywords from response
keywords = [k.strip() for k in keywords_text.split(',')]
keywords = [k for k in keywords if k and len(k) > 2][:max_keywords]
return {
'keywords': keywords,
'success': True,
'error': None,
'duration': time.time() - start_time
}
except requests.exceptions.Timeout:
return {
'keywords': [],
'success': False,
'error': f"Request timed out after {self.timeout}s",
'duration': time.time() - start_time
}
except Exception as e:
return {
'keywords': [],
'success': False,
'error': str(e),
'duration': time.time() - start_time
}
if __name__ == '__main__':