diff --git a/backend/routes/ollama_routes.py b/backend/routes/ollama_routes.py index 50bfb18..822d79d 100644 --- a/backend/routes/ollama_routes.py +++ b/backend/routes/ollama_routes.py @@ -221,19 +221,41 @@ def get_gpu_status(): import subprocess # Check if nvidia-smi works in ollama container result = subprocess.run( - ['docker', 'exec', 'munich-news-ollama', 'nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], + ['docker', 'exec', 'munich-news-ollama', 'nvidia-smi', '--query-gpu=name,memory.used,utilization.gpu', '--format=csv,noheader'], capture_output=True, text=True, timeout=5 ) if result.returncode == 0 and result.stdout.strip(): + gpu_data = result.stdout.strip().split(',') gpu_info['gpu_available'] = True gpu_info['gpu_details'] = { - 'gpu_name': result.stdout.strip(), + 'gpu_name': gpu_data[0].strip() if len(gpu_data) > 0 else 'Unknown', + 'memory_used': gpu_data[1].strip() if len(gpu_data) > 1 else 'N/A', + 'utilization': gpu_data[2].strip() if len(gpu_data) > 2 else 'N/A', 'note': 'GPU available but no model currently loaded' } - except: - pass + + # Check Ollama logs for GPU usage evidence + log_result = subprocess.run( + ['docker', 'logs', '--tail', '50', 'munich-news-ollama'], + capture_output=True, + text=True, + timeout=5 + ) + if log_result.returncode == 0: + logs = log_result.stdout + log_result.stderr + # Look for GPU offloading messages + if 'offloaded' in logs.lower() and 'gpu' in logs.lower(): + gpu_info['gpu_in_use'] = True + gpu_info['gpu_details']['note'] = 'GPU acceleration active (detected in logs)' + # Extract layer info if available + import re + match = re.search(r'offloaded (\d+)/(\d+) layers', logs, re.IGNORECASE) + if match: + gpu_info['gpu_details']['layers_offloaded'] = f"{match.group(1)}/{match.group(2)}" + except Exception as e: + gpu_info['debug_error'] = str(e) # Try to get system info try: