update
This commit is contained in:
@@ -26,6 +26,43 @@ class OllamaClient:
|
||||
self.enabled = enabled
|
||||
self.timeout = timeout
|
||||
|
||||
def _chat_request(self, messages, options=None):
|
||||
"""
|
||||
Helper to make chat requests to Ollama
|
||||
|
||||
Args:
|
||||
messages: List of message dicts [{'role': 'user', 'content': '...'}]
|
||||
options: Optional dict of model parameters
|
||||
|
||||
Returns:
|
||||
str: Generated text content
|
||||
"""
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
url = f"{self.base_url}/api/chat"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||||
|
||||
payload = {
|
||||
'model': self.model,
|
||||
'messages': messages,
|
||||
'stream': False,
|
||||
'options': options
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
return result.get('message', {}).get('content', '').strip()
|
||||
|
||||
def summarize_article(self, content, max_words=150):
|
||||
"""
|
||||
Summarize article content using Ollama
|
||||
@@ -70,37 +107,26 @@ class OllamaClient:
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Construct prompt
|
||||
prompt = self._build_summarization_prompt(content, max_words)
|
||||
|
||||
# Prepare request
|
||||
url = f"{self.base_url}/api/generate"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||||
|
||||
payload = {
|
||||
'model': self.model,
|
||||
'prompt': prompt,
|
||||
'stream': False,
|
||||
'options': {
|
||||
'temperature': 0.7,
|
||||
'num_predict': 250 # Limit response length
|
||||
# Construct messages for chat API
|
||||
messages = [
|
||||
{
|
||||
'role': 'system',
|
||||
'content': f"You are a skilled journalist writing for The New York Times. Summarize the provided article in English in {max_words} words or less.\\n\\nWrite in the clear, engaging, and authoritative style of New York Times Magazine:\\n- Lead with the most newsworthy information\\n- Use active voice and vivid language\\n- Make it accessible and easy to read\\n- Focus on what matters to readers\\n- Even if the source is in German or another language, write your summary entirely in English\\n\\nIMPORTANT: Write in plain text only. Do NOT use markdown formatting (no ##, **, *, bullets, etc.). Just write natural prose."
|
||||
},
|
||||
{
|
||||
'role': 'user',
|
||||
'content': f"Summarize this article:\\n\\n{content}"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Make request
|
||||
response = requests.post(
|
||||
url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
# Make request using chat endpoint
|
||||
summary = self._chat_request(
|
||||
messages,
|
||||
options={
|
||||
'temperature': 0.5,
|
||||
'num_predict': 350
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse response
|
||||
result = response.json()
|
||||
summary = result.get('response', '').strip()
|
||||
|
||||
if not summary:
|
||||
return {
|
||||
@@ -198,37 +224,26 @@ class OllamaClient:
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Construct prompt
|
||||
prompt = self._build_translation_prompt(title, target_language)
|
||||
|
||||
# Prepare request
|
||||
url = f"{self.base_url}/api/generate"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||||
|
||||
payload = {
|
||||
'model': self.model,
|
||||
'prompt': prompt,
|
||||
'stream': False,
|
||||
'options': {
|
||||
'temperature': 0.3, # Lower temperature for consistent translations
|
||||
'num_predict': 100 # Limit response length for title-length outputs
|
||||
# Construct messages for chat API
|
||||
messages = [
|
||||
{
|
||||
'role': 'system',
|
||||
'content': f"You are a professional translator. Translate the following German news headline to {target_language}.\\n\\nIMPORTANT: Provide ONLY the {target_language} translation. Do not include explanations, quotes, or any other text. Just the translated headline."
|
||||
},
|
||||
{
|
||||
'role': 'user',
|
||||
'content': title
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Make request
|
||||
response = requests.post(
|
||||
url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
# Make request using chat endpoint
|
||||
translated_title = self._chat_request(
|
||||
messages,
|
||||
options={
|
||||
'temperature': 0.1, # Low temperature for consistent translations
|
||||
'num_predict': 100 # Limit response length
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse response
|
||||
result = response.json()
|
||||
translated_title = result.get('response', '').strip()
|
||||
|
||||
if not translated_title:
|
||||
return {
|
||||
@@ -241,6 +256,13 @@ class OllamaClient:
|
||||
# Clean the translation output
|
||||
translated_title = self._clean_translation(translated_title)
|
||||
|
||||
# Validate translation (if it's same as original, it might have failed)
|
||||
if translated_title.lower() == title.lower() and target_language == 'English':
|
||||
# Retry with more forceful prompt
|
||||
messages[0]['content'] += " If the text is already English, just output it as is."
|
||||
translated_title = self._chat_request(messages, options={'temperature': 0.1})
|
||||
translated_title = self._clean_translation(translated_title)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'translated_title': translated_title,
|
||||
@@ -277,19 +299,6 @@ class OllamaClient:
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
def _build_translation_prompt(self, title, target_language):
|
||||
"""Build prompt for title translation"""
|
||||
prompt = f"""Translate the following German news headline to {target_language}.
|
||||
|
||||
IMPORTANT: Provide ONLY the {target_language} translation. Do not include explanations, quotes, or any other text. Just the translated headline.
|
||||
|
||||
German headline:
|
||||
{title}
|
||||
|
||||
{target_language} translation:"""
|
||||
|
||||
return prompt
|
||||
|
||||
def _clean_translation(self, translation):
|
||||
"""Clean translation output by removing quotes and extra text"""
|
||||
# Extract first line only
|
||||
@@ -335,31 +344,6 @@ German headline:
|
||||
|
||||
return text
|
||||
|
||||
def _build_summarization_prompt(self, content, max_words):
|
||||
"""Build prompt for article summarization"""
|
||||
# Truncate content if too long (keep first 5000 words)
|
||||
words = content.split()
|
||||
if len(words) > 5000:
|
||||
content = ' '.join(words[:5000]) + '...'
|
||||
|
||||
prompt = f"""You are a skilled journalist writing for The New York Times. Summarize the following article in English in {max_words} words or less.
|
||||
|
||||
Write in the clear, engaging, and authoritative style of New York Times Magazine:
|
||||
- Lead with the most newsworthy information
|
||||
- Use active voice and vivid language
|
||||
- Make it accessible and easy to read
|
||||
- Focus on what matters to readers
|
||||
- Even if the source is in German or another language, write your summary entirely in English
|
||||
|
||||
IMPORTANT: Write in plain text only. Do NOT use markdown formatting (no ##, **, *, bullets, etc.). Just write natural prose.
|
||||
|
||||
Article:
|
||||
{content}
|
||||
|
||||
New York Times-style summary (max {max_words} words):"""
|
||||
|
||||
return prompt
|
||||
|
||||
def is_available(self):
|
||||
"""
|
||||
Check if Ollama server is reachable
|
||||
@@ -462,37 +446,24 @@ New York Times-style summary (max {max_words} words):"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"num_predict": max_tokens,
|
||||
"temperature": 0.1 # Low temperature for consistent answers
|
||||
}
|
||||
},
|
||||
timeout=self.timeout
|
||||
messages = [{'role': 'user', 'content': prompt}]
|
||||
|
||||
text = self._chat_request(
|
||||
messages,
|
||||
options={
|
||||
"num_predict": max_tokens,
|
||||
"temperature": 0.1
|
||||
}
|
||||
)
|
||||
|
||||
duration = time.time() - start_time
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
return {
|
||||
'text': result.get('response', '').strip(),
|
||||
'success': True,
|
||||
'error': None,
|
||||
'duration': duration
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'text': '',
|
||||
'success': False,
|
||||
'error': f"HTTP {response.status_code}: {response.text}",
|
||||
'duration': duration
|
||||
}
|
||||
return {
|
||||
'text': text,
|
||||
'success': True,
|
||||
'error': None,
|
||||
'duration': duration
|
||||
}
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return {
|
||||
@@ -537,47 +508,26 @@ New York Times-style summary (max {max_words} words):"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Construct prompt for keyword extraction
|
||||
prompt = f"""Extract {max_keywords} key topics or keywords from this article. These will be used to understand user interests.
|
||||
|
||||
Title: {title}
|
||||
Summary: {summary}
|
||||
|
||||
Return ONLY the keywords separated by commas, nothing else. Focus on:
|
||||
- Main topics (e.g., "Bayern Munich", "Oktoberfest", "City Council")
|
||||
- Locations (e.g., "Marienplatz", "Airport")
|
||||
- Events or themes (e.g., "Transportation", "Housing", "Technology")
|
||||
|
||||
Keywords:"""
|
||||
|
||||
# Prepare request
|
||||
url = f"{self.base_url}/api/generate"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||||
|
||||
payload = {
|
||||
'model': self.model,
|
||||
'prompt': prompt,
|
||||
'stream': False,
|
||||
'options': {
|
||||
'temperature': 0.3, # Lower temperature for consistent extraction
|
||||
'num_predict': 100 # Limit response length
|
||||
# Construct messages for chat API
|
||||
messages = [
|
||||
{
|
||||
'role': 'system',
|
||||
'content': f"Extract {max_keywords} key topics or keywords from the article.\\n\\nReturn ONLY the keywords separated by commas, nothing else. Focus on:\\n- Main topics (e.g., 'Bayern Munich', 'Oktoberfest', 'City Council')\\n- Locations (e.g., 'Marienplatz', 'Airport')\\n- Events or themes (e.g., 'Transportation', 'Housing', 'Technology')"
|
||||
},
|
||||
{
|
||||
'role': 'user',
|
||||
'content': f"Title: {title}\\nSummary: {summary}"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Make request
|
||||
response = requests.post(
|
||||
url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
keywords_text = self._chat_request(
|
||||
messages,
|
||||
options={
|
||||
'temperature': 0.2,
|
||||
'num_predict': 100
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse response
|
||||
result = response.json()
|
||||
keywords_text = result.get('response', '').strip()
|
||||
|
||||
if not keywords_text:
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user