This commit is contained in:
2025-12-10 12:43:18 +00:00
parent 19fabacf5a
commit 57f37c8dc0

View File

@@ -26,6 +26,43 @@ class OllamaClient:
self.enabled = enabled self.enabled = enabled
self.timeout = timeout self.timeout = timeout
def _chat_request(self, messages, options=None):
"""
Helper to make chat requests to Ollama
Args:
messages: List of message dicts [{'role': 'user', 'content': '...'}]
options: Optional dict of model parameters
Returns:
str: Generated text content
"""
if options is None:
options = {}
url = f"{self.base_url}/api/chat"
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
payload = {
'model': self.model,
'messages': messages,
'stream': False,
'options': options
}
response = requests.post(
url,
json=payload,
headers=headers,
timeout=self.timeout
)
response.raise_for_status()
result = response.json()
return result.get('message', {}).get('content', '').strip()
def summarize_article(self, content, max_words=150): def summarize_article(self, content, max_words=150):
""" """
Summarize article content using Ollama Summarize article content using Ollama
@@ -70,37 +107,26 @@ class OllamaClient:
start_time = time.time() start_time = time.time()
try: try:
# Construct prompt # Construct messages for chat API
prompt = self._build_summarization_prompt(content, max_words) messages = [
{
# Prepare request 'role': 'system',
url = f"{self.base_url}/api/generate" 'content': f"You are a skilled journalist writing for The New York Times. Summarize the provided article in English in {max_words} words or less.\\n\\nWrite in the clear, engaging, and authoritative style of New York Times Magazine:\\n- Lead with the most newsworthy information\\n- Use active voice and vivid language\\n- Make it accessible and easy to read\\n- Focus on what matters to readers\\n- Even if the source is in German or another language, write your summary entirely in English\\n\\nIMPORTANT: Write in plain text only. Do NOT use markdown formatting (no ##, **, *, bullets, etc.). Just write natural prose."
headers = {'Content-Type': 'application/json'} },
if self.api_key: {
headers['Authorization'] = f'Bearer {self.api_key}' 'role': 'user',
'content': f"Summarize this article:\\n\\n{content}"
payload = {
'model': self.model,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.7,
'num_predict': 250 # Limit response length
} }
} ]
# Make request # Make request using chat endpoint
response = requests.post( summary = self._chat_request(
url, messages,
json=payload, options={
headers=headers, 'temperature': 0.5,
timeout=self.timeout 'num_predict': 350
}
) )
response.raise_for_status()
# Parse response
result = response.json()
summary = result.get('response', '').strip()
if not summary: if not summary:
return { return {
@@ -198,37 +224,26 @@ class OllamaClient:
start_time = time.time() start_time = time.time()
try: try:
# Construct prompt # Construct messages for chat API
prompt = self._build_translation_prompt(title, target_language) messages = [
{
# Prepare request 'role': 'system',
url = f"{self.base_url}/api/generate" 'content': f"You are a professional translator. Translate the following German news headline to {target_language}.\\n\\nIMPORTANT: Provide ONLY the {target_language} translation. Do not include explanations, quotes, or any other text. Just the translated headline."
headers = {'Content-Type': 'application/json'} },
if self.api_key: {
headers['Authorization'] = f'Bearer {self.api_key}' 'role': 'user',
'content': title
payload = {
'model': self.model,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.3, # Lower temperature for consistent translations
'num_predict': 100 # Limit response length for title-length outputs
} }
} ]
# Make request # Make request using chat endpoint
response = requests.post( translated_title = self._chat_request(
url, messages,
json=payload, options={
headers=headers, 'temperature': 0.1, # Low temperature for consistent translations
timeout=self.timeout 'num_predict': 100 # Limit response length
}
) )
response.raise_for_status()
# Parse response
result = response.json()
translated_title = result.get('response', '').strip()
if not translated_title: if not translated_title:
return { return {
@@ -241,6 +256,13 @@ class OllamaClient:
# Clean the translation output # Clean the translation output
translated_title = self._clean_translation(translated_title) translated_title = self._clean_translation(translated_title)
# Validate translation (if it's same as original, it might have failed)
if translated_title.lower() == title.lower() and target_language == 'English':
# Retry with more forceful prompt
messages[0]['content'] += " If the text is already English, just output it as is."
translated_title = self._chat_request(messages, options={'temperature': 0.1})
translated_title = self._clean_translation(translated_title)
return { return {
'success': True, 'success': True,
'translated_title': translated_title, 'translated_title': translated_title,
@@ -277,19 +299,6 @@ class OllamaClient:
'duration': time.time() - start_time 'duration': time.time() - start_time
} }
def _build_translation_prompt(self, title, target_language):
"""Build prompt for title translation"""
prompt = f"""Translate the following German news headline to {target_language}.
IMPORTANT: Provide ONLY the {target_language} translation. Do not include explanations, quotes, or any other text. Just the translated headline.
German headline:
{title}
{target_language} translation:"""
return prompt
def _clean_translation(self, translation): def _clean_translation(self, translation):
"""Clean translation output by removing quotes and extra text""" """Clean translation output by removing quotes and extra text"""
# Extract first line only # Extract first line only
@@ -335,31 +344,6 @@ German headline:
return text return text
def _build_summarization_prompt(self, content, max_words):
"""Build prompt for article summarization"""
# Truncate content if too long (keep first 5000 words)
words = content.split()
if len(words) > 5000:
content = ' '.join(words[:5000]) + '...'
prompt = f"""You are a skilled journalist writing for The New York Times. Summarize the following article in English in {max_words} words or less.
Write in the clear, engaging, and authoritative style of New York Times Magazine:
- Lead with the most newsworthy information
- Use active voice and vivid language
- Make it accessible and easy to read
- Focus on what matters to readers
- Even if the source is in German or another language, write your summary entirely in English
IMPORTANT: Write in plain text only. Do NOT use markdown formatting (no ##, **, *, bullets, etc.). Just write natural prose.
Article:
{content}
New York Times-style summary (max {max_words} words):"""
return prompt
def is_available(self): def is_available(self):
""" """
Check if Ollama server is reachable Check if Ollama server is reachable
@@ -462,37 +446,24 @@ New York Times-style summary (max {max_words} words):"""
start_time = time.time() start_time = time.time()
try: try:
response = requests.post( messages = [{'role': 'user', 'content': prompt}]
f"{self.base_url}/api/generate",
json={ text = self._chat_request(
"model": self.model, messages,
"prompt": prompt, options={
"stream": False, "num_predict": max_tokens,
"options": { "temperature": 0.1
"num_predict": max_tokens, }
"temperature": 0.1 # Low temperature for consistent answers
}
},
timeout=self.timeout
) )
duration = time.time() - start_time duration = time.time() - start_time
if response.status_code == 200: return {
result = response.json() 'text': text,
return { 'success': True,
'text': result.get('response', '').strip(), 'error': None,
'success': True, 'duration': duration
'error': None, }
'duration': duration
}
else:
return {
'text': '',
'success': False,
'error': f"HTTP {response.status_code}: {response.text}",
'duration': duration
}
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
return { return {
@@ -537,47 +508,26 @@ New York Times-style summary (max {max_words} words):"""
start_time = time.time() start_time = time.time()
try: try:
# Construct prompt for keyword extraction # Construct messages for chat API
prompt = f"""Extract {max_keywords} key topics or keywords from this article. These will be used to understand user interests. messages = [
{
Title: {title} 'role': 'system',
Summary: {summary} 'content': f"Extract {max_keywords} key topics or keywords from the article.\\n\\nReturn ONLY the keywords separated by commas, nothing else. Focus on:\\n- Main topics (e.g., 'Bayern Munich', 'Oktoberfest', 'City Council')\\n- Locations (e.g., 'Marienplatz', 'Airport')\\n- Events or themes (e.g., 'Transportation', 'Housing', 'Technology')"
},
Return ONLY the keywords separated by commas, nothing else. Focus on: {
- Main topics (e.g., "Bayern Munich", "Oktoberfest", "City Council") 'role': 'user',
- Locations (e.g., "Marienplatz", "Airport") 'content': f"Title: {title}\\nSummary: {summary}"
- Events or themes (e.g., "Transportation", "Housing", "Technology")
Keywords:"""
# Prepare request
url = f"{self.base_url}/api/generate"
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
payload = {
'model': self.model,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.3, # Lower temperature for consistent extraction
'num_predict': 100 # Limit response length
} }
} ]
# Make request # Make request
response = requests.post( keywords_text = self._chat_request(
url, messages,
json=payload, options={
headers=headers, 'temperature': 0.2,
timeout=self.timeout 'num_predict': 100
}
) )
response.raise_for_status()
# Parse response
result = response.json()
keywords_text = result.get('response', '').strip()
if not keywords_text: if not keywords_text:
return { return {