update

2025-11-12 11:55:53 +01:00
parent 6773775f2a
commit d59372d1d6
8 changed files with 694 additions and 20 deletions
--- a/backend/routes/ollama_routes.py
+++ b/backend/routes/ollama_routes.py
@@ -156,3 +156,163 @@ def get_ollama_models():
                'enabled': Config.OLLAMA_ENABLED
            }
        }), 500
+
+
+@ollama_bp.route('/api/ollama/gpu-status', methods=['GET'])
+def get_gpu_status():
+    """Check if Ollama is using GPU acceleration"""
+    import requests
+    
+    try:
+        if not Config.OLLAMA_ENABLED:
+            return jsonify({
+                'status': 'disabled',
+                'message': 'Ollama is not enabled',
+                'gpu_available': False,
+                'gpu_in_use': False
+            }), 200
+        
+        # Get Ollama process info
+        try:
+            response = requests.get(
+                f"{Config.OLLAMA_BASE_URL}/api/ps",
+                timeout=5
+            )
+            
+            if response.status_code == 200:
+                ps_data = response.json()
+                
+                # Check if any models are loaded
+                models_loaded = ps_data.get('models', [])
+                
+                gpu_info = {
+                    'status': 'success',
+                    'ollama_running': True,
+                    'models_loaded': len(models_loaded),
+                    'gpu_available': False,
+                    'gpu_in_use': False,
+                    'gpu_details': None
+                }
+                
+                # Check for GPU usage in loaded models
+                for model in models_loaded:
+                    if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0:
+                        gpu_info['gpu_in_use'] = True
+                        gpu_info['gpu_available'] = True
+                        gpu_info['gpu_details'] = {
+                            'model': model.get('name', 'unknown'),
+                            'gpu_layers': model.get('gpu_layers', 0),
+                            'size': model.get('size', 0)
+                        }
+                        break
+                
+                # Try to get system info
+                try:
+                    tags_response = requests.get(
+                        f"{Config.OLLAMA_BASE_URL}/api/tags",
+                        timeout=5
+                    )
+                    if tags_response.status_code == 200:
+                        tags_data = tags_response.json()
+                        gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])]
+                except:
+                    pass
+                
+                # Add recommendation
+                if not gpu_info['gpu_in_use']:
+                    gpu_info['recommendation'] = (
+                        "GPU not detected. To enable GPU acceleration:\n"
+                        "1. Ensure NVIDIA GPU is available\n"
+                        "2. Install nvidia-docker2\n"
+                        "3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n"
+                        "4. See docs/GPU_SETUP.md for details"
+                    )
+                else:
+                    gpu_info['recommendation'] = "✓ GPU acceleration is active!"
+                
+                return jsonify(gpu_info), 200
+            else:
+                return jsonify({
+                    'status': 'error',
+                    'message': f'Ollama API returned status {response.status_code}',
+                    'ollama_running': False,
+                    'gpu_available': False,
+                    'gpu_in_use': False
+                }), 500
+                
+        except requests.exceptions.ConnectionError:
+            return jsonify({
+                'status': 'error',
+                'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}',
+                'ollama_running': False,
+                'gpu_available': False,
+                'gpu_in_use': False,
+                'troubleshooting': {
+                    'check_container': 'docker-compose ps ollama',
+                    'check_logs': 'docker-compose logs ollama',
+                    'restart': 'docker-compose restart ollama'
+                }
+            }), 500
+            
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': f'Error checking GPU status: {str(e)}',
+            'gpu_available': False,
+            'gpu_in_use': False
+        }), 500
+
+
+@ollama_bp.route('/api/ollama/test', methods=['GET'])
+def test_ollama_performance():
+    """Test Ollama performance and measure response time"""
+    import time
+    
+    try:
+        if not Config.OLLAMA_ENABLED:
+            return jsonify({
+                'status': 'disabled',
+                'message': 'Ollama is not enabled'
+            }), 200
+        
+        # Test prompt
+        test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture."
+        
+        start_time = time.time()
+        response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.")
+        duration = time.time() - start_time
+        
+        if response_text:
+            # Estimate performance
+            if duration < 5:
+                performance = "Excellent (GPU likely active)"
+            elif duration < 15:
+                performance = "Good (GPU may be active)"
+            elif duration < 30:
+                performance = "Fair (CPU mode)"
+            else:
+                performance = "Slow (CPU mode, consider GPU)"
+            
+            return jsonify({
+                'status': 'success',
+                'response': response_text,
+                'duration_seconds': round(duration, 2),
+                'performance': performance,
+                'model': Config.OLLAMA_MODEL,
+                'recommendation': (
+                    "GPU acceleration recommended" if duration > 15 
+                    else "Performance is good"
+                )
+            }), 200
+        else:
+            return jsonify({
+                'status': 'error',
+                'message': error_message or 'Failed to get response',
+                'duration_seconds': round(duration, 2)
+            }), 500
+            
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': f'Error testing Ollama: {str(e)}'
+        }), 500