from flask import Blueprint, jsonify from config import Config from services.ollama_service import call_ollama, list_ollama_models import os ollama_bp = Blueprint('ollama', __name__) @ollama_bp.route('/api/ollama/ping', methods=['GET', 'POST']) def ping_ollama(): """Test connection to Ollama server""" try: # Check if Ollama is enabled if not Config.OLLAMA_ENABLED: return jsonify({ 'status': 'disabled', 'message': 'Ollama is not enabled. Set OLLAMA_ENABLED=true in your .env file.', 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': False } }), 200 # Send a simple test prompt test_prompt = "Say 'Hello! I am connected and working.' in one sentence." system_prompt = "You are a helpful assistant. Respond briefly and concisely." response_text, error_message = call_ollama(test_prompt, system_prompt) if response_text: return jsonify({ 'status': 'success', 'message': 'Successfully connected to Ollama', 'response': response_text, 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': True } }), 200 else: # Try to get available models for better error message available_models, _ = list_ollama_models() troubleshooting = { 'check_server': f'Verify Ollama is running at {Config.OLLAMA_BASE_URL}', 'check_model': f'Verify model "{Config.OLLAMA_MODEL}" is available (run: ollama list)', 'test_connection': f'Test manually: curl {Config.OLLAMA_BASE_URL}/api/generate -d \'{{"model":"{Config.OLLAMA_MODEL}","prompt":"test"}}\'' } if available_models: troubleshooting['available_models'] = available_models troubleshooting['suggestion'] = f'Try setting OLLAMA_MODEL to one of: {", ".join(available_models[:5])}' return jsonify({ 'status': 'error', 'message': error_message or 'Failed to get response from Ollama', 'error_details': error_message, 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': True }, 'troubleshooting': troubleshooting }), 500 except Exception as e: return jsonify({ 'status': 'error', 'message': f'Error connecting to Ollama: {str(e)}', 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': Config.OLLAMA_ENABLED } }), 500 @ollama_bp.route('/api/ollama/config', methods=['GET']) def get_ollama_config(): """Get current Ollama configuration (for debugging)""" try: from pathlib import Path backend_dir = Path(__file__).parent.parent env_path = backend_dir / '.env' return jsonify({ 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': Config.OLLAMA_ENABLED, 'has_api_key': bool(Config.OLLAMA_API_KEY) }, 'env_file_path': str(env_path), 'env_file_exists': env_path.exists(), 'current_working_directory': os.getcwd() }), 200 except Exception as e: return jsonify({ 'error': str(e), 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': Config.OLLAMA_ENABLED } }), 500 @ollama_bp.route('/api/ollama/models', methods=['GET']) def get_ollama_models(): """List available models on Ollama server""" try: if not Config.OLLAMA_ENABLED: return jsonify({ 'status': 'disabled', 'message': 'Ollama is not enabled. Set OLLAMA_ENABLED=true in your .env file.', 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': False } }), 200 models, error_message = list_ollama_models() if models is not None: return jsonify({ 'status': 'success', 'models': models, 'current_model': Config.OLLAMA_MODEL, 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': True } }), 200 else: return jsonify({ 'status': 'error', 'message': error_message or 'Failed to list models', 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': True } }), 500 except Exception as e: return jsonify({ 'status': 'error', 'message': f'Error listing models: {str(e)}', 'ollama_config': { 'base_url': Config.OLLAMA_BASE_URL, 'model': Config.OLLAMA_MODEL, 'enabled': Config.OLLAMA_ENABLED } }), 500 @ollama_bp.route('/api/ollama/gpu-status', methods=['GET']) def get_gpu_status(): """Check if Ollama is using GPU acceleration""" import requests try: if not Config.OLLAMA_ENABLED: return jsonify({ 'status': 'disabled', 'message': 'Ollama is not enabled', 'gpu_available': False, 'gpu_in_use': False }), 200 # Get Ollama process info try: response = requests.get( f"{Config.OLLAMA_BASE_URL}/api/ps", timeout=5 ) if response.status_code == 200: ps_data = response.json() # Check if any models are loaded models_loaded = ps_data.get('models', []) gpu_info = { 'status': 'success', 'ollama_running': True, 'models_loaded': len(models_loaded), 'gpu_available': False, 'gpu_in_use': False, 'gpu_details': None } # Check for GPU usage in loaded models for model in models_loaded: # Check various GPU indicators gpu_layers = model.get('gpu_layers', 0) details = model.get('details', {}) # Check if GPU is mentioned in any field if (gpu_layers > 0 or 'gpu' in str(model).lower() or 'cuda' in str(model).lower() or details.get('families', []) and 'gpu' in str(details.get('families', [])).lower()): gpu_info['gpu_in_use'] = True gpu_info['gpu_available'] = True gpu_info['gpu_details'] = { 'model': model.get('name', 'unknown'), 'gpu_layers': gpu_layers, 'size': model.get('size', 0), 'size_vram': model.get('size_vram', 0) } break # If no models loaded, check Docker container for GPU if not gpu_info['gpu_in_use']: try: import subprocess # Check if nvidia-smi works in ollama container result = subprocess.run( ['docker', 'exec', 'munich-news-ollama', 'nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], capture_output=True, text=True, timeout=5 ) if result.returncode == 0 and result.stdout.strip(): gpu_info['gpu_available'] = True gpu_info['gpu_details'] = { 'gpu_name': result.stdout.strip(), 'note': 'GPU available but no model currently loaded' } except: pass # Try to get system info try: tags_response = requests.get( f"{Config.OLLAMA_BASE_URL}/api/tags", timeout=5 ) if tags_response.status_code == 200: tags_data = tags_response.json() gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])] except: pass # Add recommendation if not gpu_info['gpu_in_use']: gpu_info['recommendation'] = ( "GPU not detected. To enable GPU acceleration:\n" "1. Ensure NVIDIA GPU is available\n" "2. Install nvidia-docker2\n" "3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n" "4. See docs/GPU_SETUP.md for details" ) else: gpu_info['recommendation'] = "✓ GPU acceleration is active!" return jsonify(gpu_info), 200 else: return jsonify({ 'status': 'error', 'message': f'Ollama API returned status {response.status_code}', 'ollama_running': False, 'gpu_available': False, 'gpu_in_use': False }), 500 except requests.exceptions.ConnectionError: return jsonify({ 'status': 'error', 'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}', 'ollama_running': False, 'gpu_available': False, 'gpu_in_use': False, 'troubleshooting': { 'check_container': 'docker-compose ps ollama', 'check_logs': 'docker-compose logs ollama', 'restart': 'docker-compose restart ollama' } }), 500 except Exception as e: return jsonify({ 'status': 'error', 'message': f'Error checking GPU status: {str(e)}', 'gpu_available': False, 'gpu_in_use': False }), 500 @ollama_bp.route('/api/ollama/test', methods=['GET']) def test_ollama_performance(): """Test Ollama performance and measure response time""" import time try: if not Config.OLLAMA_ENABLED: return jsonify({ 'status': 'disabled', 'message': 'Ollama is not enabled' }), 200 # Test prompt test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture." start_time = time.time() response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.") duration = time.time() - start_time if response_text: # Estimate performance if duration < 5: performance = "Excellent (GPU likely active)" elif duration < 15: performance = "Good (GPU may be active)" elif duration < 30: performance = "Fair (CPU mode)" else: performance = "Slow (CPU mode, consider GPU)" return jsonify({ 'status': 'success', 'response': response_text, 'duration_seconds': round(duration, 2), 'performance': performance, 'model': Config.OLLAMA_MODEL, 'recommendation': ( "GPU acceleration recommended" if duration > 15 else "Performance is good" ) }), 200 else: return jsonify({ 'status': 'error', 'message': error_message or 'Failed to get response', 'duration_seconds': round(duration, 2) }), 500 except Exception as e: return jsonify({ 'status': 'error', 'message': f'Error testing Ollama: {str(e)}' }), 500