gpu config

2025-11-12 15:21:58 +01:00
parent ec281f9440
commit d09d96ca4b
2 changed files with 63 additions and 3 deletions
--- a/backend/routes/ollama_routes.py
+++ b/backend/routes/ollama_routes.py
@@ -196,16 +196,45 @@ def get_gpu_status():
                # Check for GPU usage in loaded models
                for model in models_loaded:
-                    if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0:
+                    # Check various GPU indicators
                    gpu_layers = model.get('gpu_layers', 0)
                    details = model.get('details', {})
                    # Check if GPU is mentioned in any field
                    if (gpu_layers > 0 or 
                        'gpu' in str(model).lower() or 
                        'cuda' in str(model).lower() or
                        details.get('families', []) and 'gpu' in str(details.get('families', [])).lower()):
                        gpu_info['gpu_in_use'] = True
                        gpu_info['gpu_available'] = True
                        gpu_info['gpu_details'] = {
                            'model': model.get('name', 'unknown'),
-                            'gpu_layers': model.get('gpu_layers', 0),
+                            'gpu_layers': gpu_layers,
-                            'size': model.get('size', 0)
+                            'size': model.get('size', 0),
                            'size_vram': model.get('size_vram', 0)
                        }
                        break
                # If no models loaded, check Docker container for GPU
                if not gpu_info['gpu_in_use']:
                    try:
                        import subprocess
                        # Check if nvidia-smi works in ollama container
                        result = subprocess.run(
                            ['docker', 'exec', 'munich-news-ollama', 'nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
                            capture_output=True,
                            text=True,
                            timeout=5
                        )
                        if result.returncode == 0 and result.stdout.strip():
                            gpu_info['gpu_available'] = True
                            gpu_info['gpu_details'] = {
                                'gpu_name': result.stdout.strip(),
                                'note': 'GPU available but no model currently loaded'
                            }
                    except:
                        pass
                # Try to get system info
                try:
                    tags_response = requests.get(
--- a/diagnose-gpu.sh
+++ b/diagnose-gpu.sh
@@ -0,0 +1,31 @@
 #!/bin/bash
 # GPU Diagnostic Script for Munich News Ollama
 echo "=========================================="
 echo "GPU Diagnostic for Munich News Ollama"
 echo "=========================================="
 echo ""
 echo "1. Checking if NVIDIA runtime is available..."
 docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi 2>&1 | head -20
 echo ""
 echo "2. Checking Ollama container GPU configuration..."
 docker inspect munich-news-ollama | grep -A 10 "DeviceRequests"
 echo ""
 echo "3. Checking if GPU is accessible inside Ollama container..."
 docker exec munich-news-ollama nvidia-smi 2>&1 | head -20
 echo ""
 echo "4. Checking Ollama logs for GPU messages..."
 docker logs munich-news-ollama 2>&1 | grep -i "gpu\|cuda\|nvidia" | tail -10
 echo ""
 echo "5. Testing Ollama with a simple prompt..."
 docker exec munich-news-ollama ollama run phi3:latest "Hello" 2>&1 | head -10
 echo ""
 echo "=========================================="
 echo "Diagnostic complete!"
 echo "=========================================="