update
This commit is contained in:
@@ -508,6 +508,110 @@ New York Times-style summary (max {max_words} words):"""
|
||||
'error': str(e),
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
def extract_keywords(self, title, summary, max_keywords=5):
|
||||
"""
|
||||
Extract keywords/topics from article for personalization
|
||||
|
||||
Args:
|
||||
title: Article title
|
||||
summary: Article summary
|
||||
max_keywords: Maximum number of keywords to extract (default 5)
|
||||
|
||||
Returns:
|
||||
{
|
||||
'keywords': list, # List of extracted keywords
|
||||
'success': bool, # Whether extraction succeeded
|
||||
'error': str or None, # Error message if failed
|
||||
'duration': float # Time taken in seconds
|
||||
}
|
||||
"""
|
||||
if not self.enabled:
|
||||
return {
|
||||
'keywords': [],
|
||||
'success': False,
|
||||
'error': 'Ollama is disabled',
|
||||
'duration': 0
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Construct prompt for keyword extraction
|
||||
prompt = f"""Extract {max_keywords} key topics or keywords from this article. These will be used to understand user interests.
|
||||
|
||||
Title: {title}
|
||||
Summary: {summary}
|
||||
|
||||
Return ONLY the keywords separated by commas, nothing else. Focus on:
|
||||
- Main topics (e.g., "Bayern Munich", "Oktoberfest", "City Council")
|
||||
- Locations (e.g., "Marienplatz", "Airport")
|
||||
- Events or themes (e.g., "Transportation", "Housing", "Technology")
|
||||
|
||||
Keywords:"""
|
||||
|
||||
# Prepare request
|
||||
url = f"{self.base_url}/api/generate"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||||
|
||||
payload = {
|
||||
'model': self.model,
|
||||
'prompt': prompt,
|
||||
'stream': False,
|
||||
'options': {
|
||||
'temperature': 0.3, # Lower temperature for consistent extraction
|
||||
'num_predict': 100 # Limit response length
|
||||
}
|
||||
}
|
||||
|
||||
# Make request
|
||||
response = requests.post(
|
||||
url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse response
|
||||
result = response.json()
|
||||
keywords_text = result.get('response', '').strip()
|
||||
|
||||
if not keywords_text:
|
||||
return {
|
||||
'keywords': [],
|
||||
'success': False,
|
||||
'error': 'Ollama returned empty response',
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
# Parse keywords from response
|
||||
keywords = [k.strip() for k in keywords_text.split(',')]
|
||||
keywords = [k for k in keywords if k and len(k) > 2][:max_keywords]
|
||||
|
||||
return {
|
||||
'keywords': keywords,
|
||||
'success': True,
|
||||
'error': None,
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return {
|
||||
'keywords': [],
|
||||
'success': False,
|
||||
'error': f"Request timed out after {self.timeout}s",
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'keywords': [],
|
||||
'success': False,
|
||||
'error': str(e),
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user