This commit is contained in:
2025-11-12 11:55:53 +01:00
parent 6773775f2a
commit d59372d1d6
8 changed files with 694 additions and 20 deletions

View File

@@ -156,3 +156,163 @@ def get_ollama_models():
'enabled': Config.OLLAMA_ENABLED
}
}), 500
@ollama_bp.route('/api/ollama/gpu-status', methods=['GET'])
def get_gpu_status():
"""Check if Ollama is using GPU acceleration"""
import requests
try:
if not Config.OLLAMA_ENABLED:
return jsonify({
'status': 'disabled',
'message': 'Ollama is not enabled',
'gpu_available': False,
'gpu_in_use': False
}), 200
# Get Ollama process info
try:
response = requests.get(
f"{Config.OLLAMA_BASE_URL}/api/ps",
timeout=5
)
if response.status_code == 200:
ps_data = response.json()
# Check if any models are loaded
models_loaded = ps_data.get('models', [])
gpu_info = {
'status': 'success',
'ollama_running': True,
'models_loaded': len(models_loaded),
'gpu_available': False,
'gpu_in_use': False,
'gpu_details': None
}
# Check for GPU usage in loaded models
for model in models_loaded:
if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0:
gpu_info['gpu_in_use'] = True
gpu_info['gpu_available'] = True
gpu_info['gpu_details'] = {
'model': model.get('name', 'unknown'),
'gpu_layers': model.get('gpu_layers', 0),
'size': model.get('size', 0)
}
break
# Try to get system info
try:
tags_response = requests.get(
f"{Config.OLLAMA_BASE_URL}/api/tags",
timeout=5
)
if tags_response.status_code == 200:
tags_data = tags_response.json()
gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])]
except:
pass
# Add recommendation
if not gpu_info['gpu_in_use']:
gpu_info['recommendation'] = (
"GPU not detected. To enable GPU acceleration:\n"
"1. Ensure NVIDIA GPU is available\n"
"2. Install nvidia-docker2\n"
"3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n"
"4. See docs/GPU_SETUP.md for details"
)
else:
gpu_info['recommendation'] = "✓ GPU acceleration is active!"
return jsonify(gpu_info), 200
else:
return jsonify({
'status': 'error',
'message': f'Ollama API returned status {response.status_code}',
'ollama_running': False,
'gpu_available': False,
'gpu_in_use': False
}), 500
except requests.exceptions.ConnectionError:
return jsonify({
'status': 'error',
'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}',
'ollama_running': False,
'gpu_available': False,
'gpu_in_use': False,
'troubleshooting': {
'check_container': 'docker-compose ps ollama',
'check_logs': 'docker-compose logs ollama',
'restart': 'docker-compose restart ollama'
}
}), 500
except Exception as e:
return jsonify({
'status': 'error',
'message': f'Error checking GPU status: {str(e)}',
'gpu_available': False,
'gpu_in_use': False
}), 500
@ollama_bp.route('/api/ollama/test', methods=['GET'])
def test_ollama_performance():
"""Test Ollama performance and measure response time"""
import time
try:
if not Config.OLLAMA_ENABLED:
return jsonify({
'status': 'disabled',
'message': 'Ollama is not enabled'
}), 200
# Test prompt
test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture."
start_time = time.time()
response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.")
duration = time.time() - start_time
if response_text:
# Estimate performance
if duration < 5:
performance = "Excellent (GPU likely active)"
elif duration < 15:
performance = "Good (GPU may be active)"
elif duration < 30:
performance = "Fair (CPU mode)"
else:
performance = "Slow (CPU mode, consider GPU)"
return jsonify({
'status': 'success',
'response': response_text,
'duration_seconds': round(duration, 2),
'performance': performance,
'model': Config.OLLAMA_MODEL,
'recommendation': (
"GPU acceleration recommended" if duration > 15
else "Performance is good"
)
}), 200
else:
return jsonify({
'status': 'error',
'message': error_message or 'Failed to get response',
'duration_seconds': round(duration, 2)
}), 500
except Exception as e:
return jsonify({
'status': 'error',
'message': f'Error testing Ollama: {str(e)}'
}), 500