This commit is contained in:
2025-12-10 12:43:18 +00:00
parent 19fabacf5a
commit 57f37c8dc0

View File

@@ -25,6 +25,43 @@ class OllamaClient:
self.api_key = api_key
self.enabled = enabled
self.timeout = timeout
def _chat_request(self, messages, options=None):
"""
Helper to make chat requests to Ollama
Args:
messages: List of message dicts [{'role': 'user', 'content': '...'}]
options: Optional dict of model parameters
Returns:
str: Generated text content
"""
if options is None:
options = {}
url = f"{self.base_url}/api/chat"
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
payload = {
'model': self.model,
'messages': messages,
'stream': False,
'options': options
}
response = requests.post(
url,
json=payload,
headers=headers,
timeout=self.timeout
)
response.raise_for_status()
result = response.json()
return result.get('message', {}).get('content', '').strip()
def summarize_article(self, content, max_words=150):
"""
@@ -70,37 +107,26 @@ class OllamaClient:
start_time = time.time()
try:
# Construct prompt
prompt = self._build_summarization_prompt(content, max_words)
# Prepare request
url = f"{self.base_url}/api/generate"
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
payload = {
'model': self.model,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.7,
'num_predict': 250 # Limit response length
# Construct messages for chat API
messages = [
{
'role': 'system',
'content': f"You are a skilled journalist writing for The New York Times. Summarize the provided article in English in {max_words} words or less.\\n\\nWrite in the clear, engaging, and authoritative style of New York Times Magazine:\\n- Lead with the most newsworthy information\\n- Use active voice and vivid language\\n- Make it accessible and easy to read\\n- Focus on what matters to readers\\n- Even if the source is in German or another language, write your summary entirely in English\\n\\nIMPORTANT: Write in plain text only. Do NOT use markdown formatting (no ##, **, *, bullets, etc.). Just write natural prose."
},
{
'role': 'user',
'content': f"Summarize this article:\\n\\n{content}"
}
}
]
# Make request
response = requests.post(
url,
json=payload,
headers=headers,
timeout=self.timeout
# Make request using chat endpoint
summary = self._chat_request(
messages,
options={
'temperature': 0.5,
'num_predict': 350
}
)
response.raise_for_status()
# Parse response
result = response.json()
summary = result.get('response', '').strip()
if not summary:
return {
@@ -198,37 +224,26 @@ class OllamaClient:
start_time = time.time()
try:
# Construct prompt
prompt = self._build_translation_prompt(title, target_language)
# Prepare request
url = f"{self.base_url}/api/generate"
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
payload = {
'model': self.model,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.3, # Lower temperature for consistent translations
'num_predict': 100 # Limit response length for title-length outputs
# Construct messages for chat API
messages = [
{
'role': 'system',
'content': f"You are a professional translator. Translate the following German news headline to {target_language}.\\n\\nIMPORTANT: Provide ONLY the {target_language} translation. Do not include explanations, quotes, or any other text. Just the translated headline."
},
{
'role': 'user',
'content': title
}
}
]
# Make request
response = requests.post(
url,
json=payload,
headers=headers,
timeout=self.timeout
# Make request using chat endpoint
translated_title = self._chat_request(
messages,
options={
'temperature': 0.1, # Low temperature for consistent translations
'num_predict': 100 # Limit response length
}
)
response.raise_for_status()
# Parse response
result = response.json()
translated_title = result.get('response', '').strip()
if not translated_title:
return {
@@ -241,6 +256,13 @@ class OllamaClient:
# Clean the translation output
translated_title = self._clean_translation(translated_title)
# Validate translation (if it's same as original, it might have failed)
if translated_title.lower() == title.lower() and target_language == 'English':
# Retry with more forceful prompt
messages[0]['content'] += " If the text is already English, just output it as is."
translated_title = self._chat_request(messages, options={'temperature': 0.1})
translated_title = self._clean_translation(translated_title)
return {
'success': True,
'translated_title': translated_title,
@@ -277,19 +299,6 @@ class OllamaClient:
'duration': time.time() - start_time
}
def _build_translation_prompt(self, title, target_language):
"""Build prompt for title translation"""
prompt = f"""Translate the following German news headline to {target_language}.
IMPORTANT: Provide ONLY the {target_language} translation. Do not include explanations, quotes, or any other text. Just the translated headline.
German headline:
{title}
{target_language} translation:"""
return prompt
def _clean_translation(self, translation):
"""Clean translation output by removing quotes and extra text"""
# Extract first line only
@@ -335,31 +344,6 @@ German headline:
return text
def _build_summarization_prompt(self, content, max_words):
"""Build prompt for article summarization"""
# Truncate content if too long (keep first 5000 words)
words = content.split()
if len(words) > 5000:
content = ' '.join(words[:5000]) + '...'
prompt = f"""You are a skilled journalist writing for The New York Times. Summarize the following article in English in {max_words} words or less.
Write in the clear, engaging, and authoritative style of New York Times Magazine:
- Lead with the most newsworthy information
- Use active voice and vivid language
- Make it accessible and easy to read
- Focus on what matters to readers
- Even if the source is in German or another language, write your summary entirely in English
IMPORTANT: Write in plain text only. Do NOT use markdown formatting (no ##, **, *, bullets, etc.). Just write natural prose.
Article:
{content}
New York Times-style summary (max {max_words} words):"""
return prompt
def is_available(self):
"""
Check if Ollama server is reachable
@@ -462,37 +446,24 @@ New York Times-style summary (max {max_words} words):"""
start_time = time.time()
try:
response = requests.post(
f"{self.base_url}/api/generate",
json={
"model": self.model,
"prompt": prompt,
"stream": False,
"options": {
"num_predict": max_tokens,
"temperature": 0.1 # Low temperature for consistent answers
}
},
timeout=self.timeout
messages = [{'role': 'user', 'content': prompt}]
text = self._chat_request(
messages,
options={
"num_predict": max_tokens,
"temperature": 0.1
}
)
duration = time.time() - start_time
if response.status_code == 200:
result = response.json()
return {
'text': result.get('response', '').strip(),
'success': True,
'error': None,
'duration': duration
}
else:
return {
'text': '',
'success': False,
'error': f"HTTP {response.status_code}: {response.text}",
'duration': duration
}
return {
'text': text,
'success': True,
'error': None,
'duration': duration
}
except requests.exceptions.Timeout:
return {
@@ -537,47 +508,26 @@ New York Times-style summary (max {max_words} words):"""
start_time = time.time()
try:
# Construct prompt for keyword extraction
prompt = f"""Extract {max_keywords} key topics or keywords from this article. These will be used to understand user interests.
Title: {title}
Summary: {summary}
Return ONLY the keywords separated by commas, nothing else. Focus on:
- Main topics (e.g., "Bayern Munich", "Oktoberfest", "City Council")
- Locations (e.g., "Marienplatz", "Airport")
- Events or themes (e.g., "Transportation", "Housing", "Technology")
Keywords:"""
# Prepare request
url = f"{self.base_url}/api/generate"
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
payload = {
'model': self.model,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.3, # Lower temperature for consistent extraction
'num_predict': 100 # Limit response length
# Construct messages for chat API
messages = [
{
'role': 'system',
'content': f"Extract {max_keywords} key topics or keywords from the article.\\n\\nReturn ONLY the keywords separated by commas, nothing else. Focus on:\\n- Main topics (e.g., 'Bayern Munich', 'Oktoberfest', 'City Council')\\n- Locations (e.g., 'Marienplatz', 'Airport')\\n- Events or themes (e.g., 'Transportation', 'Housing', 'Technology')"
},
{
'role': 'user',
'content': f"Title: {title}\\nSummary: {summary}"
}
}
]
# Make request
response = requests.post(
url,
json=payload,
headers=headers,
timeout=self.timeout
keywords_text = self._chat_request(
messages,
options={
'temperature': 0.2,
'num_predict': 100
}
)
response.raise_for_status()
# Parse response
result = response.json()
keywords_text = result.get('response', '').strip()
if not keywords_text:
return {