update
This commit is contained in:
@@ -156,3 +156,163 @@ def get_ollama_models():
|
||||
'enabled': Config.OLLAMA_ENABLED
|
||||
}
|
||||
}), 500
|
||||
|
||||
|
||||
@ollama_bp.route('/api/ollama/gpu-status', methods=['GET'])
|
||||
def get_gpu_status():
|
||||
"""Check if Ollama is using GPU acceleration"""
|
||||
import requests
|
||||
|
||||
try:
|
||||
if not Config.OLLAMA_ENABLED:
|
||||
return jsonify({
|
||||
'status': 'disabled',
|
||||
'message': 'Ollama is not enabled',
|
||||
'gpu_available': False,
|
||||
'gpu_in_use': False
|
||||
}), 200
|
||||
|
||||
# Get Ollama process info
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{Config.OLLAMA_BASE_URL}/api/ps",
|
||||
timeout=5
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
ps_data = response.json()
|
||||
|
||||
# Check if any models are loaded
|
||||
models_loaded = ps_data.get('models', [])
|
||||
|
||||
gpu_info = {
|
||||
'status': 'success',
|
||||
'ollama_running': True,
|
||||
'models_loaded': len(models_loaded),
|
||||
'gpu_available': False,
|
||||
'gpu_in_use': False,
|
||||
'gpu_details': None
|
||||
}
|
||||
|
||||
# Check for GPU usage in loaded models
|
||||
for model in models_loaded:
|
||||
if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0:
|
||||
gpu_info['gpu_in_use'] = True
|
||||
gpu_info['gpu_available'] = True
|
||||
gpu_info['gpu_details'] = {
|
||||
'model': model.get('name', 'unknown'),
|
||||
'gpu_layers': model.get('gpu_layers', 0),
|
||||
'size': model.get('size', 0)
|
||||
}
|
||||
break
|
||||
|
||||
# Try to get system info
|
||||
try:
|
||||
tags_response = requests.get(
|
||||
f"{Config.OLLAMA_BASE_URL}/api/tags",
|
||||
timeout=5
|
||||
)
|
||||
if tags_response.status_code == 200:
|
||||
tags_data = tags_response.json()
|
||||
gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])]
|
||||
except:
|
||||
pass
|
||||
|
||||
# Add recommendation
|
||||
if not gpu_info['gpu_in_use']:
|
||||
gpu_info['recommendation'] = (
|
||||
"GPU not detected. To enable GPU acceleration:\n"
|
||||
"1. Ensure NVIDIA GPU is available\n"
|
||||
"2. Install nvidia-docker2\n"
|
||||
"3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n"
|
||||
"4. See docs/GPU_SETUP.md for details"
|
||||
)
|
||||
else:
|
||||
gpu_info['recommendation'] = "✓ GPU acceleration is active!"
|
||||
|
||||
return jsonify(gpu_info), 200
|
||||
else:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'Ollama API returned status {response.status_code}',
|
||||
'ollama_running': False,
|
||||
'gpu_available': False,
|
||||
'gpu_in_use': False
|
||||
}), 500
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}',
|
||||
'ollama_running': False,
|
||||
'gpu_available': False,
|
||||
'gpu_in_use': False,
|
||||
'troubleshooting': {
|
||||
'check_container': 'docker-compose ps ollama',
|
||||
'check_logs': 'docker-compose logs ollama',
|
||||
'restart': 'docker-compose restart ollama'
|
||||
}
|
||||
}), 500
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'Error checking GPU status: {str(e)}',
|
||||
'gpu_available': False,
|
||||
'gpu_in_use': False
|
||||
}), 500
|
||||
|
||||
|
||||
@ollama_bp.route('/api/ollama/test', methods=['GET'])
|
||||
def test_ollama_performance():
|
||||
"""Test Ollama performance and measure response time"""
|
||||
import time
|
||||
|
||||
try:
|
||||
if not Config.OLLAMA_ENABLED:
|
||||
return jsonify({
|
||||
'status': 'disabled',
|
||||
'message': 'Ollama is not enabled'
|
||||
}), 200
|
||||
|
||||
# Test prompt
|
||||
test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture."
|
||||
|
||||
start_time = time.time()
|
||||
response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.")
|
||||
duration = time.time() - start_time
|
||||
|
||||
if response_text:
|
||||
# Estimate performance
|
||||
if duration < 5:
|
||||
performance = "Excellent (GPU likely active)"
|
||||
elif duration < 15:
|
||||
performance = "Good (GPU may be active)"
|
||||
elif duration < 30:
|
||||
performance = "Fair (CPU mode)"
|
||||
else:
|
||||
performance = "Slow (CPU mode, consider GPU)"
|
||||
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'response': response_text,
|
||||
'duration_seconds': round(duration, 2),
|
||||
'performance': performance,
|
||||
'model': Config.OLLAMA_MODEL,
|
||||
'recommendation': (
|
||||
"GPU acceleration recommended" if duration > 15
|
||||
else "Performance is good"
|
||||
)
|
||||
}), 200
|
||||
else:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': error_message or 'Failed to get response',
|
||||
'duration_seconds': round(duration, 2)
|
||||
}), 500
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'Error testing Ollama: {str(e)}'
|
||||
}), 500
|
||||
|
||||
Reference in New Issue
Block a user