From d09d96ca4b854997e0319d12dd11746239a845d0 Mon Sep 17 00:00:00 2001
From: Dongho Kim <dongho@ekstrah.com>
Date: Wed, 12 Nov 2025 15:21:58 +0100
Subject: [PATCH] gpu config

---
 backend/routes/ollama_routes.py | 35 ++++++++++++++++++++++++++++++---
 diagnose-gpu.sh                 | 31 +++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 diagnose-gpu.sh

diff --git a/backend/routes/ollama_routes.py b/backend/routes/ollama_routes.py
index 442f893..50bfb18 100644
--- a/backend/routes/ollama_routes.py
+++ b/backend/routes/ollama_routes.py
@@ -196,16 +196,45 @@ def get_gpu_status():
                 
                 # Check for GPU usage in loaded models
                 for model in models_loaded:
-                    if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0:
+                    # Check various GPU indicators
+                    gpu_layers = model.get('gpu_layers', 0)
+                    details = model.get('details', {})
+                    
+                    # Check if GPU is mentioned in any field
+                    if (gpu_layers > 0 or 
+                        'gpu' in str(model).lower() or 
+                        'cuda' in str(model).lower() or
+                        details.get('families', []) and 'gpu' in str(details.get('families', [])).lower()):
                         gpu_info['gpu_in_use'] = True
                         gpu_info['gpu_available'] = True
                         gpu_info['gpu_details'] = {
                             'model': model.get('name', 'unknown'),
-                            'gpu_layers': model.get('gpu_layers', 0),
-                            'size': model.get('size', 0)
+                            'gpu_layers': gpu_layers,
+                            'size': model.get('size', 0),
+                            'size_vram': model.get('size_vram', 0)
                         }
                         break
                 
+                # If no models loaded, check Docker container for GPU
+                if not gpu_info['gpu_in_use']:
+                    try:
+                        import subprocess
+                        # Check if nvidia-smi works in ollama container
+                        result = subprocess.run(
+                            ['docker', 'exec', 'munich-news-ollama', 'nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
+                            capture_output=True,
+                            text=True,
+                            timeout=5
+                        )
+                        if result.returncode == 0 and result.stdout.strip():
+                            gpu_info['gpu_available'] = True
+                            gpu_info['gpu_details'] = {
+                                'gpu_name': result.stdout.strip(),
+                                'note': 'GPU available but no model currently loaded'
+                            }
+                    except:
+                        pass
+                
                 # Try to get system info
                 try:
                     tags_response = requests.get(
diff --git a/diagnose-gpu.sh b/diagnose-gpu.sh
new file mode 100644
index 0000000..1229ef7
--- /dev/null
+++ b/diagnose-gpu.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# GPU Diagnostic Script for Munich News Ollama
+
+echo "=========================================="
+echo "GPU Diagnostic for Munich News Ollama"
+echo "=========================================="
+echo ""
+
+echo "1. Checking if NVIDIA runtime is available..."
+docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi 2>&1 | head -20
+echo ""
+
+echo "2. Checking Ollama container GPU configuration..."
+docker inspect munich-news-ollama | grep -A 10 "DeviceRequests"
+echo ""
+
+echo "3. Checking if GPU is accessible inside Ollama container..."
+docker exec munich-news-ollama nvidia-smi 2>&1 | head -20
+echo ""
+
+echo "4. Checking Ollama logs for GPU messages..."
+docker logs munich-news-ollama 2>&1 | grep -i "gpu\|cuda\|nvidia" | tail -10
+echo ""
+
+echo "5. Testing Ollama with a simple prompt..."
+docker exec munich-news-ollama ollama run phi3:latest "Hello" 2>&1 | head -10
+echo ""
+
+echo "=========================================="
+echo "Diagnostic complete!"
+echo "=========================================="