update

2025-11-12 11:55:53 +01:00
parent 6773775f2a
commit d59372d1d6
8 changed files with 694 additions and 20 deletions
@@ -124,6 +124,8 @@ That's it! The system will automatically:

 📖 **For detailed Ollama setup & GPU acceleration:** See [docs/OLLAMA_SETUP.md](docs/OLLAMA_SETUP.md)

+💡 **To change AI model:** Edit `OLLAMA_MODEL` in `.env`, then run `./pull-ollama-model.sh`. See [docs/CHANGING_AI_MODEL.md](docs/CHANGING_AI_MODEL.md)
+
 ## ⚙️ Configuration

 Edit `backend/.env`:
@@ -156,3 +156,163 @@ def get_ollama_models():
                'enabled': Config.OLLAMA_ENABLED
            }
        }), 500
+
+
+@ollama_bp.route('/api/ollama/gpu-status', methods=['GET'])
+def get_gpu_status():
+    """Check if Ollama is using GPU acceleration"""
+    import requests
+    
+    try:
+        if not Config.OLLAMA_ENABLED:
+            return jsonify({
+                'status': 'disabled',
+                'message': 'Ollama is not enabled',
+                'gpu_available': False,
+                'gpu_in_use': False
+            }), 200
+        
+        # Get Ollama process info
+        try:
+            response = requests.get(
+                f"{Config.OLLAMA_BASE_URL}/api/ps",
+                timeout=5
+            )
+            
+            if response.status_code == 200:
+                ps_data = response.json()
+                
+                # Check if any models are loaded
+                models_loaded = ps_data.get('models', [])
+                
+                gpu_info = {
+                    'status': 'success',
+                    'ollama_running': True,
+                    'models_loaded': len(models_loaded),
+                    'gpu_available': False,
+                    'gpu_in_use': False,
+                    'gpu_details': None
+                }
+                
+                # Check for GPU usage in loaded models
+                for model in models_loaded:
+                    if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0:
+                        gpu_info['gpu_in_use'] = True
+                        gpu_info['gpu_available'] = True
+                        gpu_info['gpu_details'] = {
+                            'model': model.get('name', 'unknown'),
+                            'gpu_layers': model.get('gpu_layers', 0),
+                            'size': model.get('size', 0)
+                        }
+                        break
+                
+                # Try to get system info
+                try:
+                    tags_response = requests.get(
+                        f"{Config.OLLAMA_BASE_URL}/api/tags",
+                        timeout=5
+                    )
+                    if tags_response.status_code == 200:
+                        tags_data = tags_response.json()
+                        gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])]
+                except:
+                    pass
+                
+                # Add recommendation
+                if not gpu_info['gpu_in_use']:
+                    gpu_info['recommendation'] = (
+                        "GPU not detected. To enable GPU acceleration:\n"
+                        "1. Ensure NVIDIA GPU is available\n"
+                        "2. Install nvidia-docker2\n"
+                        "3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n"
+                        "4. See docs/GPU_SETUP.md for details"
+                    )
+                else:
+                    gpu_info['recommendation'] = "✓ GPU acceleration is active!"
+                
+                return jsonify(gpu_info), 200
+            else:
+                return jsonify({
+                    'status': 'error',
+                    'message': f'Ollama API returned status {response.status_code}',
+                    'ollama_running': False,
+                    'gpu_available': False,
+                    'gpu_in_use': False
+                }), 500
+                
+        except requests.exceptions.ConnectionError:
+            return jsonify({
+                'status': 'error',
+                'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}',
+                'ollama_running': False,
+                'gpu_available': False,
+                'gpu_in_use': False,
+                'troubleshooting': {
+                    'check_container': 'docker-compose ps ollama',
+                    'check_logs': 'docker-compose logs ollama',
+                    'restart': 'docker-compose restart ollama'
+                }
+            }), 500
+            
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': f'Error checking GPU status: {str(e)}',
+            'gpu_available': False,
+            'gpu_in_use': False
+        }), 500
+
+
+@ollama_bp.route('/api/ollama/test', methods=['GET'])
+def test_ollama_performance():
+    """Test Ollama performance and measure response time"""
+    import time
+    
+    try:
+        if not Config.OLLAMA_ENABLED:
+            return jsonify({
+                'status': 'disabled',
+                'message': 'Ollama is not enabled'
+            }), 200
+        
+        # Test prompt
+        test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture."
+        
+        start_time = time.time()
+        response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.")
+        duration = time.time() - start_time
+        
+        if response_text:
+            # Estimate performance
+            if duration < 5:
+                performance = "Excellent (GPU likely active)"
+            elif duration < 15:
+                performance = "Good (GPU may be active)"
+            elif duration < 30:
+                performance = "Fair (CPU mode)"
+            else:
+                performance = "Slow (CPU mode, consider GPU)"
+            
+            return jsonify({
+                'status': 'success',
+                'response': response_text,
+                'duration_seconds': round(duration, 2),
+                'performance': performance,
+                'model': Config.OLLAMA_MODEL,
+                'recommendation': (
+                    "GPU acceleration recommended" if duration > 15 
+                    else "Performance is good"
+                )
+            }), 200
+        else:
+            return jsonify({
+                'status': 'error',
+                'message': error_message or 'Failed to get response',
+                'duration_seconds': round(duration, 2)
+            }), 500
+            
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': f'Error testing Ollama: {str(e)}'
+        }), 500
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Check GPU status via API
+
+echo "=========================================="
+echo "Ollama GPU Status Check"
+echo "=========================================="
+echo ""
+
+# Check GPU status
+echo "1. GPU Status:"
+echo "---"
+curl -s http://localhost:5001/api/ollama/gpu-status | python3 -m json.tool
+echo ""
+echo ""
+
+# Test performance
+echo "2. Performance Test:"
+echo "---"
+curl -s http://localhost:5001/api/ollama/test | python3 -m json.tool
+echo ""
+echo ""
+
+# List models
+echo "3. Available Models:"
+echo "---"
+curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
+echo ""
+echo ""
+
+echo "=========================================="
+echo "Quick Summary:"
+echo "=========================================="
+
+# Extract key info
+GPU_STATUS=$(curl -s http://localhost:5001/api/ollama/gpu-status | python3 -c "import json,sys; data=json.load(sys.stdin); print('GPU Active' if data.get('gpu_in_use') else 'CPU Mode')" 2>/dev/null || echo "Error")
+PERF=$(curl -s http://localhost:5001/api/ollama/test | python3 -c "import json,sys; data=json.load(sys.stdin); print(f\"{data.get('duration_seconds', 'N/A')}s - {data.get('performance', 'N/A')}\")" 2>/dev/null || echo "Error")
+
+echo "GPU Status: $GPU_STATUS"
+echo "Performance: $PERF"
+echo ""
+
+if [ "$GPU_STATUS" = "CPU Mode" ]; then
+    echo "💡 TIP: Enable GPU for 5-10x faster processing:"
+    echo "   docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d"
+    echo "   See docs/GPU_SETUP.md for details"
+fi
@@ -52,17 +52,10 @@ services:
      - munich-news-network
    env_file:
      - backend/.env
-    entrypoint: /bin/sh
-    command: >
-      -c "
-      echo 'Waiting for Ollama service to be ready...' &&
-      sleep 5 &&
-      echo 'Pulling model: ${OLLAMA_MODEL:-phi3:latest}' &&
-      curl -X POST http://ollama:11434/api/pull -d '{\"name\":\"${OLLAMA_MODEL:-phi3:latest}\"}' &&
-      echo '' &&
-      echo 'Model ${OLLAMA_MODEL:-phi3:latest} pull initiated!'
-      "
-    restart: "no"
+    volumes:
+      - ./scripts/setup-ollama-model.sh:/setup-ollama-model.sh:ro
+    command: sh /setup-ollama-model.sh
+    restart: on-failure

  # MongoDB Database (Internal only - not exposed to host)
  mongodb:
@@ -15,6 +15,21 @@ OLLAMA_MODEL=phi3:latest

 ## ✅ How to Change the Model

+### Important Note
+
+✅ **The model IS automatically checked and downloaded on startup**
+
+The `ollama-setup` service runs on every `docker-compose up` and:
+- Checks if the model specified in `.env` exists
+- Downloads it if missing
+- Skips download if already present
+
+This means you can simply:
+1. Change `OLLAMA_MODEL` in `.env`
+2. Run `docker-compose up -d`
+3. Wait for download (if needed)
+4. Done!
+
 ### Step 1: Update .env File

 Edit `backend/.env` and change the `OLLAMA_MODEL` value:
@@ -30,22 +45,38 @@ OLLAMA_MODEL=mistral:7b
 OLLAMA_MODEL=your-custom-model:latest
 ```

-### Step 2: Restart Services
-
-The model will be automatically downloaded on startup:
+### Step 2: Restart Services (Model Auto-Downloads)

+**Option A: Simple restart (Recommended)**
 ```bash
-# Stop services
-docker-compose down
-
-# Start services (model will be pulled automatically)
+# Restart all services
 docker-compose up -d

-# Watch the download progress
+# Watch the model check/download
 docker-compose logs -f ollama-setup
 ```

-**Note:** First startup with a new model takes 2-10 minutes depending on model size.
+The `ollama-setup` service will:
+- Check if the new model exists
+- Download it if missing (2-10 minutes)
+- Skip download if already present
+
+**Option B: Manual pull (if you want control)**
+```bash
+# Pull the model manually first
+./pull-ollama-model.sh
+
+# Then restart
+docker-compose restart crawler backend
+```
+
+**Option C: Full restart**
+```bash
+docker-compose down
+docker-compose up -d
+```
+
+**Note:** Model download takes 2-10 minutes depending on model size.

 ## Supported Models

@@ -264,3 +295,68 @@ A: 5-10GB for small models, 50GB+ for large models. Plan accordingly.
 - [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama installation & configuration
 - [GPU_SETUP.md](GPU_SETUP.md) - GPU acceleration setup
 - [AI_NEWS_AGGREGATION.md](AI_NEWS_AGGREGATION.md) - AI features overview
+
+
+## Complete Example: Changing from phi3 to llama3
+
+```bash
+# 1. Check current model
+curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
+# Shows: "current_model": "phi3:latest"
+
+# 2. Update .env file
+# Edit backend/.env and change:
+# OLLAMA_MODEL=llama3:8b
+
+# 3. Pull the new model
+./pull-ollama-model.sh
+# Or manually: docker-compose exec ollama ollama pull llama3:8b
+
+# 4. Restart services
+docker-compose restart crawler backend
+
+# 5. Verify the change
+curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
+# Shows: "current_model": "llama3:8b"
+
+# 6. Test performance
+curl -s http://localhost:5001/api/ollama/test | python3 -m json.tool
+# Should show improved quality with llama3
+```
+
+## Quick Reference
+
+### Change Model Workflow
+
+```bash
+# 1. Edit .env
+vim backend/.env  # Change OLLAMA_MODEL
+
+# 2. Pull model
+./pull-ollama-model.sh
+
+# 3. Restart
+docker-compose restart crawler backend
+
+# 4. Verify
+curl http://localhost:5001/api/ollama/test
+```
+
+### Common Commands
+
+```bash
+# List downloaded models
+docker-compose exec ollama ollama list
+
+# Pull a specific model
+docker-compose exec ollama ollama pull mistral:7b
+
+# Remove a model
+docker-compose exec ollama ollama rm phi3:latest
+
+# Check current config
+curl http://localhost:5001/api/ollama/config
+
+# Test performance
+curl http://localhost:5001/api/ollama/test
+```
@@ -0,0 +1,276 @@
+# How to Check GPU Status via API
+
+## Quick Check
+
+### 1. GPU Status
+```bash
+curl http://localhost:5001/api/ollama/gpu-status | python3 -m json.tool
+```
+
+**Response:**
+```json
+{
+  "status": "success",
+  "ollama_running": true,
+  "gpu_available": true,
+  "gpu_in_use": true,
+  "gpu_details": {
+    "model": "phi3:latest",
+    "gpu_layers": 32,
+    "size": 2300000000
+  },
+  "recommendation": "✓ GPU acceleration is active!"
+}
+```
+
+### 2. Performance Test
+```bash
+curl http://localhost:5001/api/ollama/test | python3 -m json.tool
+```
+
+**Response:**
+```json
+{
+  "status": "success",
+  "duration_seconds": 3.2,
+  "performance": "Excellent (GPU likely active)",
+  "model": "phi3:latest",
+  "recommendation": "Performance is good"
+}
+```
+
+### 3. List Models
+```bash
+curl http://localhost:5001/api/ollama/models | python3 -m json.tool
+```
+
+## Using the Check Script
+
+We've created a convenient script:
+
+```bash
+./check-gpu-api.sh
+```
+
+**Output:**
+```
+==========================================
+Ollama GPU Status Check
+==========================================
+
+1. GPU Status:
+---
+{
+  "status": "success",
+  "gpu_in_use": true,
+  ...
+}
+
+2. Performance Test:
+---
+{
+  "duration_seconds": 3.2,
+  "performance": "Excellent (GPU likely active)"
+}
+
+3. Available Models:
+---
+{
+  "models": ["phi3:latest", "llama3:8b"]
+}
+
+==========================================
+Quick Summary:
+==========================================
+GPU Status: GPU Active
+Performance: 3.2s - Excellent (GPU likely active)
+```
+
+## API Endpoints
+
+### GET /api/ollama/gpu-status
+Check if GPU is being used by Ollama.
+
+**Response Fields:**
+- `gpu_available` - GPU hardware detected
+- `gpu_in_use` - Ollama actively using GPU
+- `gpu_details` - GPU configuration details
+- `recommendation` - Setup suggestions
+
+### GET /api/ollama/test
+Test Ollama performance with a sample prompt.
+
+**Response Fields:**
+- `duration_seconds` - Time taken for test
+- `performance` - Performance rating
+- `recommendation` - Performance suggestions
+
+### GET /api/ollama/models
+List all available models.
+
+**Response Fields:**
+- `models` - Array of model names
+- `current_model` - Active model from .env
+
+### GET /api/ollama/ping
+Test basic Ollama connectivity.
+
+### GET /api/ollama/config
+View current Ollama configuration.
+
+## Interpreting Results
+
+### GPU Status
+
+**✅ GPU Active:**
+```json
+{
+  "gpu_in_use": true,
+  "gpu_available": true
+}
+```
+- GPU acceleration is working
+- Expect 5-10x faster processing
+
+**❌ CPU Mode:**
+```json
+{
+  "gpu_in_use": false,
+  "gpu_available": false
+}
+```
+- Running on CPU only
+- Slower processing (15-30s per article)
+
+### Performance Ratings
+
+| Duration | Rating | Mode |
+|----------|--------|------|
+| < 5s | Excellent | GPU likely active |
+| 5-15s | Good | GPU may be active |
+| 15-30s | Fair | CPU mode |
+| > 30s | Slow | CPU mode, GPU recommended |
+
+## Troubleshooting
+
+### GPU Not Detected
+
+1. **Check if GPU compose is used:**
+   ```bash
+   docker-compose ps
+   # Should show GPU configuration
+   ```
+
+2. **Verify NVIDIA runtime:**
+   ```bash
+   docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
+   ```
+
+3. **Check Ollama logs:**
+   ```bash
+   docker-compose logs ollama | grep -i gpu
+   ```
+
+### Slow Performance
+
+If performance test shows > 15s:
+
+1. **Enable GPU acceleration:**
+   ```bash
+   docker-compose down
+   docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d
+   ```
+
+2. **Verify GPU is available:**
+   ```bash
+   nvidia-smi
+   ```
+
+3. **Check model size:**
+   - Larger models = slower
+   - Try `phi3:latest` for fastest performance
+
+### Connection Errors
+
+If API returns connection errors:
+
+1. **Check backend is running:**
+   ```bash
+   docker-compose ps backend
+   ```
+
+2. **Check Ollama is running:**
+   ```bash
+   docker-compose ps ollama
+   ```
+
+3. **Restart services:**
+   ```bash
+   docker-compose restart backend ollama
+   ```
+
+## Monitoring in Production
+
+### Automated Checks
+
+Add to your monitoring:
+
+```bash
+# Check GPU status every 5 minutes
+*/5 * * * * curl -s http://localhost:5001/api/ollama/gpu-status | \
+  python3 -c "import json,sys; data=json.load(sys.stdin); \
+  sys.exit(0 if data.get('gpu_in_use') else 1)"
+```
+
+### Performance Alerts
+
+Alert if performance degrades:
+
+```bash
+# Alert if response time > 20s
+DURATION=$(curl -s http://localhost:5001/api/ollama/test | \
+  python3 -c "import json,sys; print(json.load(sys.stdin).get('duration_seconds', 999))")
+
+if (( $(echo "$DURATION > 20" | bc -l) )); then
+  echo "ALERT: Ollama performance degraded: ${DURATION}s"
+fi
+```
+
+## Example: Full Health Check
+
+```bash
+#!/bin/bash
+# health-check.sh
+
+echo "Checking Ollama Health..."
+
+# 1. GPU Status
+GPU=$(curl -s http://localhost:5001/api/ollama/gpu-status | \
+  python3 -c "import json,sys; print('GPU' if json.load(sys.stdin).get('gpu_in_use') else 'CPU')")
+
+# 2. Performance
+PERF=$(curl -s http://localhost:5001/api/ollama/test | \
+  python3 -c "import json,sys; data=json.load(sys.stdin); print(f\"{data.get('duration_seconds')}s\")")
+
+# 3. Models
+MODELS=$(curl -s http://localhost:5001/api/ollama/models | \
+  python3 -c "import json,sys; print(len(json.load(sys.stdin).get('models', [])))")
+
+echo "Mode: $GPU"
+echo "Performance: $PERF"
+echo "Models: $MODELS"
+
+# Exit with error if CPU mode and slow
+if [ "$GPU" = "CPU" ] && (( $(echo "$PERF > 20" | bc -l) )); then
+  echo "WARNING: Running in CPU mode with slow performance"
+  exit 1
+fi
+
+echo "✓ Health check passed"
+```
+
+## Related Documentation
+
+- [GPU_SETUP.md](GPU_SETUP.md) - GPU setup guide
+- [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama configuration
+- [CHANGING_AI_MODEL.md](CHANGING_AI_MODEL.md) - Model switching guide
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Pull Ollama model from .env file
+
+set -e
+
+# Load OLLAMA_MODEL from .env
+if [ -f backend/.env ]; then
+    export $(grep -v '^#' backend/.env | grep OLLAMA_MODEL | xargs)
+else
+    echo "Error: backend/.env file not found"
+    exit 1
+fi
+
+# Default to phi3:latest if not set
+MODEL=${OLLAMA_MODEL:-phi3:latest}
+
+echo "=========================================="
+echo "Pulling Ollama Model: $MODEL"
+echo "=========================================="
+echo ""
+
+# Check if Ollama container is running
+if ! docker-compose ps ollama | grep -q "Up"; then
+    echo "Error: Ollama container is not running"
+    echo "Start it with: docker-compose up -d ollama"
+    exit 1
+fi
+
+echo "Pulling model via Ollama API..."
+echo ""
+
+# Pull the model
+docker-compose exec -T ollama ollama pull "$MODEL"
+
+echo ""
+echo "=========================================="
+echo "✓ Model $MODEL pulled successfully!"
+echo "=========================================="
+echo ""
+echo "Verify with:"
+echo "  docker-compose exec ollama ollama list"
+echo ""
+echo "Test with:"
+echo "  curl http://localhost:5001/api/ollama/test"
@@ -0,0 +1,57 @@
+#!/bin/sh
+# Ollama Model Setup Script
+# Checks if model exists and downloads if needed
+
+set -e
+
+MODEL="${OLLAMA_MODEL:-phi3:latest}"
+
+echo "========================================"
+echo "Ollama Model Setup"
+echo "Target model: $MODEL"
+echo "========================================"
+echo ""
+
+# Wait for Ollama to be ready
+echo "Waiting for Ollama service..."
+sleep 3
+
+# Check if model exists
+echo "Checking if model exists..."
+MODELS=$(curl -s http://ollama:11434/api/tags 2>/dev/null || echo "")
+
+if [ -z "$MODELS" ]; then
+    echo "⚠ Warning: Could not connect to Ollama"
+    echo "Attempting to pull model anyway..."
+    curl -X POST http://ollama:11434/api/pull -d "{\"name\":\"$MODEL\"}"
+    echo ""
+    echo "✓ Model pull initiated: $MODEL"
+    exit 0
+fi
+
+# Check if our model is in the list
+if echo "$MODELS" | grep -q "\"$MODEL\""; then
+    echo "✓ Model already exists: $MODEL"
+    echo "Skipping download."
+    echo ""
+    echo "Available models:"
+    echo "$MODELS" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 | sed 's/^/  - /'
+else
+    echo "⬇ Model not found, downloading: $MODEL"
+    echo "This may take 2-10 minutes depending on model size..."
+    echo ""
+    
+    # Pull the model
+    curl -X POST http://ollama:11434/api/pull -d "{\"name\":\"$MODEL\"}"
+    
+    echo ""
+    echo "✓ Model download initiated: $MODEL"
+    echo ""
+    echo "Monitor progress with:"
+    echo "  docker-compose logs -f ollama"
+fi
+
+echo ""
+echo "========================================"
+echo "Setup complete!"
+echo "========================================"