432 lines
15 KiB
Python
432 lines
15 KiB
Python
"""
|
|
Ollama client for AI-powered article summarization
|
|
"""
|
|
import requests
|
|
import time
|
|
from datetime import datetime
|
|
|
|
|
|
class OllamaClient:
|
|
"""Client for communicating with Ollama server for text summarization"""
|
|
|
|
def __init__(self, base_url, model, api_key=None, enabled=True, timeout=30):
|
|
"""
|
|
Initialize Ollama client
|
|
|
|
Args:
|
|
base_url: Ollama server URL (e.g., http://localhost:11434)
|
|
model: Model name to use (e.g., phi3:latest)
|
|
api_key: Optional API key for authentication
|
|
enabled: Whether Ollama is enabled
|
|
timeout: Request timeout in seconds (default 30)
|
|
"""
|
|
self.base_url = base_url.rstrip('/')
|
|
self.model = model
|
|
self.api_key = api_key
|
|
self.enabled = enabled
|
|
self.timeout = timeout
|
|
|
|
def summarize_article(self, content, max_words=150):
|
|
"""
|
|
Summarize article content using Ollama
|
|
|
|
Args:
|
|
content: Full article text
|
|
max_words: Maximum words in summary (default 150)
|
|
|
|
Returns:
|
|
{
|
|
'summary': str, # AI-generated summary
|
|
'summary_word_count': int, # Summary word count
|
|
'original_word_count': int, # Original article word count
|
|
'success': bool, # Whether summarization succeeded
|
|
'error': str or None, # Error message if failed
|
|
'duration': float # Time taken in seconds
|
|
}
|
|
"""
|
|
if not self.enabled:
|
|
return {
|
|
'summary': None,
|
|
'summary_word_count': 0,
|
|
'original_word_count': 0,
|
|
'success': False,
|
|
'error': 'Ollama is not enabled',
|
|
'duration': 0
|
|
}
|
|
|
|
if not content or len(content.strip()) == 0:
|
|
return {
|
|
'summary': None,
|
|
'summary_word_count': 0,
|
|
'original_word_count': 0,
|
|
'success': False,
|
|
'error': 'Content is empty',
|
|
'duration': 0
|
|
}
|
|
|
|
# Calculate original word count
|
|
original_word_count = len(content.split())
|
|
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Construct prompt
|
|
prompt = self._build_summarization_prompt(content, max_words)
|
|
|
|
# Prepare request
|
|
url = f"{self.base_url}/api/generate"
|
|
headers = {'Content-Type': 'application/json'}
|
|
if self.api_key:
|
|
headers['Authorization'] = f'Bearer {self.api_key}'
|
|
|
|
payload = {
|
|
'model': self.model,
|
|
'prompt': prompt,
|
|
'stream': False,
|
|
'options': {
|
|
'temperature': 0.7,
|
|
'num_predict': 250 # Limit response length
|
|
}
|
|
}
|
|
|
|
# Make request
|
|
response = requests.post(
|
|
url,
|
|
json=payload,
|
|
headers=headers,
|
|
timeout=self.timeout
|
|
)
|
|
response.raise_for_status()
|
|
|
|
# Parse response
|
|
result = response.json()
|
|
summary = result.get('response', '').strip()
|
|
|
|
if not summary:
|
|
return {
|
|
'summary': None,
|
|
'summary_word_count': 0,
|
|
'original_word_count': original_word_count,
|
|
'success': False,
|
|
'error': 'Ollama returned empty summary',
|
|
'duration': time.time() - start_time
|
|
}
|
|
|
|
summary_word_count = len(summary.split())
|
|
|
|
return {
|
|
'summary': summary,
|
|
'summary_word_count': summary_word_count,
|
|
'original_word_count': original_word_count,
|
|
'success': True,
|
|
'error': None,
|
|
'duration': time.time() - start_time
|
|
}
|
|
|
|
except requests.exceptions.Timeout:
|
|
return {
|
|
'summary': None,
|
|
'summary_word_count': 0,
|
|
'original_word_count': original_word_count,
|
|
'success': False,
|
|
'error': f'Request timed out after {self.timeout} seconds',
|
|
'duration': time.time() - start_time
|
|
}
|
|
except requests.exceptions.ConnectionError:
|
|
return {
|
|
'summary': None,
|
|
'summary_word_count': 0,
|
|
'original_word_count': original_word_count,
|
|
'success': False,
|
|
'error': f'Cannot connect to Ollama server at {self.base_url}',
|
|
'duration': time.time() - start_time
|
|
}
|
|
except requests.exceptions.HTTPError as e:
|
|
return {
|
|
'summary': None,
|
|
'summary_word_count': 0,
|
|
'original_word_count': original_word_count,
|
|
'success': False,
|
|
'error': f'HTTP error: {e.response.status_code} - {e.response.text[:100]}',
|
|
'duration': time.time() - start_time
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
'summary': None,
|
|
'summary_word_count': 0,
|
|
'original_word_count': original_word_count,
|
|
'success': False,
|
|
'error': f'Unexpected error: {str(e)}',
|
|
'duration': time.time() - start_time
|
|
}
|
|
|
|
def translate_title(self, title, target_language='English'):
|
|
"""
|
|
Translate article title to target language
|
|
|
|
Args:
|
|
title: Original title (typically German)
|
|
target_language: Target language (default: 'English')
|
|
|
|
Returns:
|
|
{
|
|
'success': bool, # Whether translation succeeded
|
|
'translated_title': str or None, # Translated title
|
|
'error': str or None, # Error message if failed
|
|
'duration': float # Time taken in seconds
|
|
}
|
|
"""
|
|
if not self.enabled:
|
|
return {
|
|
'success': False,
|
|
'translated_title': None,
|
|
'error': 'Ollama is not enabled',
|
|
'duration': 0
|
|
}
|
|
|
|
if not title or len(title.strip()) == 0:
|
|
return {
|
|
'success': False,
|
|
'translated_title': None,
|
|
'error': 'Title is empty',
|
|
'duration': 0
|
|
}
|
|
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Construct prompt
|
|
prompt = self._build_translation_prompt(title, target_language)
|
|
|
|
# Prepare request
|
|
url = f"{self.base_url}/api/generate"
|
|
headers = {'Content-Type': 'application/json'}
|
|
if self.api_key:
|
|
headers['Authorization'] = f'Bearer {self.api_key}'
|
|
|
|
payload = {
|
|
'model': self.model,
|
|
'prompt': prompt,
|
|
'stream': False,
|
|
'options': {
|
|
'temperature': 0.3, # Lower temperature for consistent translations
|
|
'num_predict': 100 # Limit response length for title-length outputs
|
|
}
|
|
}
|
|
|
|
# Make request
|
|
response = requests.post(
|
|
url,
|
|
json=payload,
|
|
headers=headers,
|
|
timeout=self.timeout
|
|
)
|
|
response.raise_for_status()
|
|
|
|
# Parse response
|
|
result = response.json()
|
|
translated_title = result.get('response', '').strip()
|
|
|
|
if not translated_title:
|
|
return {
|
|
'success': False,
|
|
'translated_title': None,
|
|
'error': 'Ollama returned empty translation',
|
|
'duration': time.time() - start_time
|
|
}
|
|
|
|
# Clean the translation output
|
|
translated_title = self._clean_translation(translated_title)
|
|
|
|
return {
|
|
'success': True,
|
|
'translated_title': translated_title,
|
|
'error': None,
|
|
'duration': time.time() - start_time
|
|
}
|
|
|
|
except requests.exceptions.Timeout:
|
|
return {
|
|
'success': False,
|
|
'translated_title': None,
|
|
'error': f'Request timed out after {self.timeout} seconds',
|
|
'duration': time.time() - start_time
|
|
}
|
|
except requests.exceptions.ConnectionError:
|
|
return {
|
|
'success': False,
|
|
'translated_title': None,
|
|
'error': f'Cannot connect to Ollama server at {self.base_url}',
|
|
'duration': time.time() - start_time
|
|
}
|
|
except requests.exceptions.HTTPError as e:
|
|
return {
|
|
'success': False,
|
|
'translated_title': None,
|
|
'error': f'HTTP error: {e.response.status_code} - {e.response.text[:100]}',
|
|
'duration': time.time() - start_time
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
'success': False,
|
|
'translated_title': None,
|
|
'error': f'Unexpected error: {str(e)}',
|
|
'duration': time.time() - start_time
|
|
}
|
|
|
|
def _build_translation_prompt(self, title, target_language):
|
|
"""Build prompt for title translation"""
|
|
prompt = f"""Translate the following German news headline to {target_language}. Provide only the translation without any explanations, quotes, or additional text.
|
|
|
|
German headline:
|
|
{title}
|
|
|
|
{target_language} translation:"""
|
|
|
|
return prompt
|
|
|
|
def _clean_translation(self, translation):
|
|
"""Clean translation output by removing quotes and extra text"""
|
|
# Extract first line only
|
|
translation = translation.split('\n')[0]
|
|
|
|
# Remove surrounding quotes (single and double)
|
|
translation = translation.strip()
|
|
if (translation.startswith('"') and translation.endswith('"')) or \
|
|
(translation.startswith("'") and translation.endswith("'")):
|
|
translation = translation[1:-1]
|
|
|
|
# Trim whitespace again after quote removal
|
|
translation = translation.strip()
|
|
|
|
return translation
|
|
|
|
def _build_summarization_prompt(self, content, max_words):
|
|
"""Build prompt for article summarization"""
|
|
# Truncate content if too long (keep first 5000 words)
|
|
words = content.split()
|
|
if len(words) > 5000:
|
|
content = ' '.join(words[:5000]) + '...'
|
|
|
|
prompt = f"""Summarize the following article in English in {max_words} words or less. Even if the article is in German or another language, provide the summary in English. Focus on the key points, main message, and important details. Be concise and clear.
|
|
|
|
Article:
|
|
{content}
|
|
|
|
English Summary (max {max_words} words):"""
|
|
|
|
return prompt
|
|
|
|
def is_available(self):
|
|
"""
|
|
Check if Ollama server is reachable
|
|
|
|
Returns:
|
|
bool: True if server is reachable, False otherwise
|
|
"""
|
|
if not self.enabled:
|
|
return False
|
|
|
|
try:
|
|
url = f"{self.base_url}/api/tags"
|
|
headers = {}
|
|
if self.api_key:
|
|
headers['Authorization'] = f'Bearer {self.api_key}'
|
|
|
|
response = requests.get(url, headers=headers, timeout=5)
|
|
response.raise_for_status()
|
|
return True
|
|
except:
|
|
return False
|
|
|
|
def test_connection(self):
|
|
"""
|
|
Test connection and return server info
|
|
|
|
Returns:
|
|
{
|
|
'available': bool,
|
|
'models': list,
|
|
'current_model': str,
|
|
'error': str or None
|
|
}
|
|
"""
|
|
if not self.enabled:
|
|
return {
|
|
'available': False,
|
|
'models': [],
|
|
'current_model': self.model,
|
|
'error': 'Ollama is not enabled'
|
|
}
|
|
|
|
try:
|
|
url = f"{self.base_url}/api/tags"
|
|
headers = {}
|
|
if self.api_key:
|
|
headers['Authorization'] = f'Bearer {self.api_key}'
|
|
|
|
response = requests.get(url, headers=headers, timeout=5)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
models = [m.get('name', '') for m in result.get('models', [])]
|
|
|
|
return {
|
|
'available': True,
|
|
'models': models,
|
|
'current_model': self.model,
|
|
'error': None
|
|
}
|
|
except requests.exceptions.ConnectionError:
|
|
return {
|
|
'available': False,
|
|
'models': [],
|
|
'current_model': self.model,
|
|
'error': f'Cannot connect to Ollama server at {self.base_url}'
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
'available': False,
|
|
'models': [],
|
|
'current_model': self.model,
|
|
'error': str(e)
|
|
}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Quick test
|
|
import os
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv(dotenv_path='../.env')
|
|
|
|
client = OllamaClient(
|
|
base_url=os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434'),
|
|
model=os.getenv('OLLAMA_MODEL', 'phi3:latest'),
|
|
enabled=True
|
|
)
|
|
|
|
print("Testing Ollama connection...")
|
|
result = client.test_connection()
|
|
print(f"Available: {result['available']}")
|
|
print(f"Models: {result['models']}")
|
|
print(f"Current model: {result['current_model']}")
|
|
|
|
if result['available']:
|
|
print("\nTesting summarization...")
|
|
test_content = """
|
|
The new U-Bahn line connecting Munich's city center with the airport opened today.
|
|
Mayor Dieter Reiter attended the opening ceremony along with hundreds of residents.
|
|
The line will significantly reduce travel time between the airport and downtown Munich.
|
|
Construction took five years and cost approximately 2 billion euros.
|
|
The new line includes 10 stations and runs every 10 minutes during peak hours.
|
|
"""
|
|
|
|
summary_result = client.summarize_article(test_content, max_words=50)
|
|
print(f"Success: {summary_result['success']}")
|
|
print(f"Summary: {summary_result['summary']}")
|
|
print(f"Original word count: {summary_result['original_word_count']}")
|
|
print(f"Summary word count: {summary_result['summary_word_count']}")
|
|
print(f"Compression: {summary_result['original_word_count'] / max(summary_result['summary_word_count'], 1):.1f}x")
|
|
print(f"Duration: {summary_result['duration']:.2f}s")
|