update

2025-11-12 11:55:53 +01:00
parent 6773775f2a
commit d59372d1d6
8 changed files with 694 additions and 20 deletions
--- a/README.md
+++ b/README.md
@@ -124,6 +124,8 @@ That's it! The system will automatically:
 📖 **For detailed Ollama setup & GPU acceleration:** See [docs/OLLAMA_SETUP.md](docs/OLLAMA_SETUP.md)
 💡 **To change AI model:** Edit `OLLAMA_MODEL` in `.env`, then run `./pull-ollama-model.sh`. See [docs/CHANGING_AI_MODEL.md](docs/CHANGING_AI_MODEL.md)
 ## ⚙️ Configuration
 Edit `backend/.env`:
--- a/backend/routes/ollama_routes.py
+++ b/backend/routes/ollama_routes.py
@@ -156,3 +156,163 @@ def get_ollama_models():
                'enabled': Config.OLLAMA_ENABLED
            }
        }), 500
@ollama_bp.route('/api/ollama/gpu-status', methods=['GET'])
 def get_gpu_status():
    """Check if Ollama is using GPU acceleration"""
    import requests
    try:
        if not Config.OLLAMA_ENABLED:
            return jsonify({
                'status': 'disabled',
                'message': 'Ollama is not enabled',
                'gpu_available': False,
                'gpu_in_use': False
            }), 200
        # Get Ollama process info
        try:
            response = requests.get(
                f"{Config.OLLAMA_BASE_URL}/api/ps",
                timeout=5
            )
            if response.status_code == 200:
                ps_data = response.json()
                # Check if any models are loaded
                models_loaded = ps_data.get('models', [])
                gpu_info = {
                    'status': 'success',
                    'ollama_running': True,
                    'models_loaded': len(models_loaded),
                    'gpu_available': False,
                    'gpu_in_use': False,
                    'gpu_details': None
                }
                # Check for GPU usage in loaded models
                for model in models_loaded:
                    if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0:
                        gpu_info['gpu_in_use'] = True
                        gpu_info['gpu_available'] = True
                        gpu_info['gpu_details'] = {
                            'model': model.get('name', 'unknown'),
                            'gpu_layers': model.get('gpu_layers', 0),
                            'size': model.get('size', 0)
                        }
                        break
                # Try to get system info
                try:
                    tags_response = requests.get(
                        f"{Config.OLLAMA_BASE_URL}/api/tags",
                        timeout=5
                    )
                    if tags_response.status_code == 200:
                        tags_data = tags_response.json()
                        gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])]
                except:
                    pass
                # Add recommendation
                if not gpu_info['gpu_in_use']:
                    gpu_info['recommendation'] = (
                        "GPU not detected. To enable GPU acceleration:\n"
                        "1. Ensure NVIDIA GPU is available\n"
                        "2. Install nvidia-docker2\n"
                        "3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n"
                        "4. See docs/GPU_SETUP.md for details"
                    )
                else:
                    gpu_info['recommendation'] = "✓ GPU acceleration is active!"
                return jsonify(gpu_info), 200
            else:
                return jsonify({
                    'status': 'error',
                    'message': f'Ollama API returned status {response.status_code}',
                    'ollama_running': False,
                    'gpu_available': False,
                    'gpu_in_use': False
                }), 500
        except requests.exceptions.ConnectionError:
            return jsonify({
                'status': 'error',
                'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}',
                'ollama_running': False,
                'gpu_available': False,
                'gpu_in_use': False,
                'troubleshooting': {
                    'check_container': 'docker-compose ps ollama',
                    'check_logs': 'docker-compose logs ollama',
                    'restart': 'docker-compose restart ollama'
                }
            }), 500
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': f'Error checking GPU status: {str(e)}',
            'gpu_available': False,
            'gpu_in_use': False
        }), 500
@ollama_bp.route('/api/ollama/test', methods=['GET'])
 def test_ollama_performance():
    """Test Ollama performance and measure response time"""
    import time
    try:
        if not Config.OLLAMA_ENABLED:
            return jsonify({
                'status': 'disabled',
                'message': 'Ollama is not enabled'
            }), 200
        # Test prompt
        test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture."
        start_time = time.time()
        response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.")
        duration = time.time() - start_time
        if response_text:
            # Estimate performance
            if duration < 5:
                performance = "Excellent (GPU likely active)"
            elif duration < 15:
                performance = "Good (GPU may be active)"
            elif duration < 30:
                performance = "Fair (CPU mode)"
            else:
                performance = "Slow (CPU mode, consider GPU)"
            return jsonify({
                'status': 'success',
                'response': response_text,
                'duration_seconds': round(duration, 2),
                'performance': performance,
                'model': Config.OLLAMA_MODEL,
                'recommendation': (
                    "GPU acceleration recommended" if duration > 15 
                    else "Performance is good"
                )
            }), 200
        else:
            return jsonify({
                'status': 'error',
                'message': error_message or 'Failed to get response',
                'duration_seconds': round(duration, 2)
            }), 500
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': f'Error testing Ollama: {str(e)}'
        }), 500
--- a/check-gpu-api.sh
+++ b/check-gpu-api.sh
@@ -0,0 +1,46 @@
 #!/bin/bash
 # Check GPU status via API
 echo "=========================================="
 echo "Ollama GPU Status Check"
 echo "=========================================="
 echo ""
 # Check GPU status
 echo "1. GPU Status:"
 echo "---"
 curl -s http://localhost:5001/api/ollama/gpu-status | python3 -m json.tool
 echo ""
 echo ""
 # Test performance
 echo "2. Performance Test:"
 echo "---"
 curl -s http://localhost:5001/api/ollama/test | python3 -m json.tool
 echo ""
 echo ""
 # List models
 echo "3. Available Models:"
 echo "---"
 curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
 echo ""
 echo ""
 echo "=========================================="
 echo "Quick Summary:"
 echo "=========================================="
 # Extract key info
 GPU_STATUS=$(curl -s http://localhost:5001/api/ollama/gpu-status | python3 -c "import json,sys; data=json.load(sys.stdin); print('GPU Active' if data.get('gpu_in_use') else 'CPU Mode')" 2>/dev/null || echo "Error")
 PERF=$(curl -s http://localhost:5001/api/ollama/test | python3 -c "import json,sys; data=json.load(sys.stdin); print(f\"{data.get('duration_seconds', 'N/A')}s - {data.get('performance', 'N/A')}\")" 2>/dev/null || echo "Error")
 echo "GPU Status: $GPU_STATUS"
 echo "Performance: $PERF"
 echo ""
 if [ "$GPU_STATUS" = "CPU Mode" ]; then
    echo "💡 TIP: Enable GPU for 5-10x faster processing:"
    echo "   docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d"
    echo "   See docs/GPU_SETUP.md for details"
 fi
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -52,17 +52,10 @@ services:
      - munich-news-network
    env_file:
      - backend/.env
-    entrypoint: /bin/sh
+    volumes:
-    command: >
+      - ./scripts/setup-ollama-model.sh:/setup-ollama-model.sh:ro
-      -c "
+    command: sh /setup-ollama-model.sh
-      echo 'Waiting for Ollama service to be ready...' &&
+    restart: on-failure
      sleep 5 &&
      echo 'Pulling model: ${OLLAMA_MODEL:-phi3:latest}' &&
      curl -X POST http://ollama:11434/api/pull -d '{\"name\":\"${OLLAMA_MODEL:-phi3:latest}\"}' &&
      echo '' &&
      echo 'Model ${OLLAMA_MODEL:-phi3:latest} pull initiated!'
      "
    restart: "no"
  # MongoDB Database (Internal only - not exposed to host)
  mongodb:
--- a/docs/CHANGING_AI_MODEL.md
+++ b/docs/CHANGING_AI_MODEL.md
@@ -15,6 +15,21 @@ OLLAMA_MODEL=phi3:latest
 ## ✅ How to Change the Model
 ### Important Note
 ✅ **The model IS automatically checked and downloaded on startup**
 The `ollama-setup` service runs on every `docker-compose up` and:
 - Checks if the model specified in `.env` exists
 - Downloads it if missing
 - Skips download if already present
 This means you can simply:
 1. Change `OLLAMA_MODEL` in `.env`
 2. Run `docker-compose up -d`
 3. Wait for download (if needed)
 4. Done!
 ### Step 1: Update .env File
 Edit `backend/.env` and change the `OLLAMA_MODEL` value:
@@ -30,22 +45,38 @@ OLLAMA_MODEL=mistral:7b
 OLLAMA_MODEL=your-custom-model:latest
 ```
-### Step 2: Restart Services
+### Step 2: Restart Services (Model Auto-Downloads)
 The model will be automatically downloaded on startup:
 **Option A: Simple restart (Recommended)**
 ```bash
-# Stop services
+# Restart all services
 docker-compose down
 # Start services (model will be pulled automatically)
 docker-compose up -d
-# Watch the download progress
+# Watch the model check/download
 docker-compose logs -f ollama-setup
 ```
-**Note:** First startup with a new model takes 2-10 minutes depending on model size.
+The `ollama-setup` service will:
 - Check if the new model exists
 - Download it if missing (2-10 minutes)
 - Skip download if already present
 **Option B: Manual pull (if you want control)**
 ```bash
 # Pull the model manually first
 ./pull-ollama-model.sh
 # Then restart
 docker-compose restart crawler backend
 ```
 **Option C: Full restart**
 ```bash
 docker-compose down
 docker-compose up -d
 ```
 **Note:** Model download takes 2-10 minutes depending on model size.
 ## Supported Models
@@ -264,3 +295,68 @@ A: 5-10GB for small models, 50GB+ for large models. Plan accordingly.
 - [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama installation & configuration
 - [GPU_SETUP.md](GPU_SETUP.md) - GPU acceleration setup
 - [AI_NEWS_AGGREGATION.md](AI_NEWS_AGGREGATION.md) - AI features overview
 ## Complete Example: Changing from phi3 to llama3
 ```bash
 # 1. Check current model
 curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
 # Shows: "current_model": "phi3:latest"
 # 2. Update .env file
 # Edit backend/.env and change:
 # OLLAMA_MODEL=llama3:8b
 # 3. Pull the new model
 ./pull-ollama-model.sh
 # Or manually: docker-compose exec ollama ollama pull llama3:8b
 # 4. Restart services
 docker-compose restart crawler backend
 # 5. Verify the change
 curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
 # Shows: "current_model": "llama3:8b"
 # 6. Test performance
 curl -s http://localhost:5001/api/ollama/test | python3 -m json.tool
 # Should show improved quality with llama3
 ```
 ## Quick Reference
 ### Change Model Workflow
 ```bash
 # 1. Edit .env
 vim backend/.env  # Change OLLAMA_MODEL
 # 2. Pull model
 ./pull-ollama-model.sh
 # 3. Restart
 docker-compose restart crawler backend
 # 4. Verify
 curl http://localhost:5001/api/ollama/test
 ```
 ### Common Commands
 ```bash
 # List downloaded models
 docker-compose exec ollama ollama list
 # Pull a specific model
 docker-compose exec ollama ollama pull mistral:7b
 # Remove a model
 docker-compose exec ollama ollama rm phi3:latest
 # Check current config
 curl http://localhost:5001/api/ollama/config
 # Test performance
 curl http://localhost:5001/api/ollama/test
 ```
--- a/docs/CHECK_GPU_STATUS.md
+++ b/docs/CHECK_GPU_STATUS.md
@@ -0,0 +1,276 @@
 # How to Check GPU Status via API
 ## Quick Check
 ### 1. GPU Status
 ```bash
 curl http://localhost:5001/api/ollama/gpu-status | python3 -m json.tool
 ```
 **Response:**
 ```json
 {
  "status": "success",
  "ollama_running": true,
  "gpu_available": true,
  "gpu_in_use": true,
  "gpu_details": {
    "model": "phi3:latest",
    "gpu_layers": 32,
    "size": 2300000000
  },
  "recommendation": "✓ GPU acceleration is active!"
 }
 ```
 ### 2. Performance Test
 ```bash
 curl http://localhost:5001/api/ollama/test | python3 -m json.tool
 ```
 **Response:**
 ```json
 {
  "status": "success",
  "duration_seconds": 3.2,
  "performance": "Excellent (GPU likely active)",
  "model": "phi3:latest",
  "recommendation": "Performance is good"
 }
 ```
 ### 3. List Models
 ```bash
 curl http://localhost:5001/api/ollama/models | python3 -m json.tool
 ```
 ## Using the Check Script
 We've created a convenient script:
 ```bash
 ./check-gpu-api.sh
 ```
 **Output:**
 ```
 ==========================================
 Ollama GPU Status Check
 ==========================================
 1. GPU Status:
 ---
 {
  "status": "success",
  "gpu_in_use": true,
  ...
 }
 2. Performance Test:
 ---
 {
  "duration_seconds": 3.2,
  "performance": "Excellent (GPU likely active)"
 }
 3. Available Models:
 ---
 {
  "models": ["phi3:latest", "llama3:8b"]
 }
 ==========================================
 Quick Summary:
 ==========================================
 GPU Status: GPU Active
 Performance: 3.2s - Excellent (GPU likely active)
 ```
 ## API Endpoints
 ### GET /api/ollama/gpu-status
 Check if GPU is being used by Ollama.
 **Response Fields:**
 - `gpu_available` - GPU hardware detected
 - `gpu_in_use` - Ollama actively using GPU
 - `gpu_details` - GPU configuration details
 - `recommendation` - Setup suggestions
 ### GET /api/ollama/test
 Test Ollama performance with a sample prompt.
 **Response Fields:**
 - `duration_seconds` - Time taken for test
 - `performance` - Performance rating
 - `recommendation` - Performance suggestions
 ### GET /api/ollama/models
 List all available models.
 **Response Fields:**
 - `models` - Array of model names
 - `current_model` - Active model from .env
 ### GET /api/ollama/ping
 Test basic Ollama connectivity.
 ### GET /api/ollama/config
 View current Ollama configuration.
 ## Interpreting Results
 ### GPU Status
 **✅ GPU Active:**
 ```json
 {
  "gpu_in_use": true,
  "gpu_available": true
 }
 ```
 - GPU acceleration is working
 - Expect 5-10x faster processing
 **❌ CPU Mode:**
 ```json
 {
  "gpu_in_use": false,
  "gpu_available": false
 }
 ```
 - Running on CPU only
 - Slower processing (15-30s per article)
 ### Performance Ratings
 | Duration | Rating | Mode |
 |----------|--------|------|
 | < 5s | Excellent | GPU likely active |
 | 5-15s | Good | GPU may be active |
 | 15-30s | Fair | CPU mode |
 | > 30s | Slow | CPU mode, GPU recommended |
 ## Troubleshooting
 ### GPU Not Detected
 1. **Check if GPU compose is used:**
   ```bash
   docker-compose ps
   # Should show GPU configuration
   ```
 2. **Verify NVIDIA runtime:**
   ```bash
   docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
   ```
 3. **Check Ollama logs:**
   ```bash
   docker-compose logs ollama | grep -i gpu
   ```
 ### Slow Performance
 If performance test shows > 15s:
 1. **Enable GPU acceleration:**
   ```bash
   docker-compose down
   docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d
   ```
 2. **Verify GPU is available:**
   ```bash
   nvidia-smi
   ```
 3. **Check model size:**
   - Larger models = slower
   - Try `phi3:latest` for fastest performance
 ### Connection Errors
 If API returns connection errors:
 1. **Check backend is running:**
   ```bash
   docker-compose ps backend
   ```
 2. **Check Ollama is running:**
   ```bash
   docker-compose ps ollama
   ```
 3. **Restart services:**
   ```bash
   docker-compose restart backend ollama
   ```
 ## Monitoring in Production
 ### Automated Checks
 Add to your monitoring:
 ```bash
 # Check GPU status every 5 minutes
 */5 * * * * curl -s http://localhost:5001/api/ollama/gpu-status | \
  python3 -c "import json,sys; data=json.load(sys.stdin); \
  sys.exit(0 if data.get('gpu_in_use') else 1)"
 ```
 ### Performance Alerts
 Alert if performance degrades:
 ```bash
 # Alert if response time > 20s
 DURATION=$(curl -s http://localhost:5001/api/ollama/test | \
  python3 -c "import json,sys; print(json.load(sys.stdin).get('duration_seconds', 999))")
 if (( $(echo "$DURATION > 20" | bc -l) )); then
  echo "ALERT: Ollama performance degraded: ${DURATION}s"
 fi
 ```
 ## Example: Full Health Check
 ```bash
 #!/bin/bash
 # health-check.sh
 echo "Checking Ollama Health..."
 # 1. GPU Status
 GPU=$(curl -s http://localhost:5001/api/ollama/gpu-status | \
  python3 -c "import json,sys; print('GPU' if json.load(sys.stdin).get('gpu_in_use') else 'CPU')")
 # 2. Performance
 PERF=$(curl -s http://localhost:5001/api/ollama/test | \
  python3 -c "import json,sys; data=json.load(sys.stdin); print(f\"{data.get('duration_seconds')}s\")")
 # 3. Models
 MODELS=$(curl -s http://localhost:5001/api/ollama/models | \
  python3 -c "import json,sys; print(len(json.load(sys.stdin).get('models', [])))")
 echo "Mode: $GPU"
 echo "Performance: $PERF"
 echo "Models: $MODELS"
 # Exit with error if CPU mode and slow
 if [ "$GPU" = "CPU" ] && (( $(echo "$PERF > 20" | bc -l) )); then
  echo "WARNING: Running in CPU mode with slow performance"
  exit 1
 fi
 echo "✓ Health check passed"
 ```
 ## Related Documentation
 - [GPU_SETUP.md](GPU_SETUP.md) - GPU setup guide
 - [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama configuration
 - [CHANGING_AI_MODEL.md](CHANGING_AI_MODEL.md) - Model switching guide
--- a/pull-ollama-model.sh
+++ b/pull-ollama-model.sh
@@ -0,0 +1,44 @@
 #!/bin/bash
 # Pull Ollama model from .env file
 set -e
 # Load OLLAMA_MODEL from .env
 if [ -f backend/.env ]; then
    export $(grep -v '^#' backend/.env | grep OLLAMA_MODEL | xargs)
 else
    echo "Error: backend/.env file not found"
    exit 1
 fi
 # Default to phi3:latest if not set
 MODEL=${OLLAMA_MODEL:-phi3:latest}
 echo "=========================================="
 echo "Pulling Ollama Model: $MODEL"
 echo "=========================================="
 echo ""
 # Check if Ollama container is running
 if ! docker-compose ps ollama | grep -q "Up"; then
    echo "Error: Ollama container is not running"
    echo "Start it with: docker-compose up -d ollama"
    exit 1
 fi
 echo "Pulling model via Ollama API..."
 echo ""
 # Pull the model
 docker-compose exec -T ollama ollama pull "$MODEL"
 echo ""
 echo "=========================================="
 echo "✓ Model $MODEL pulled successfully!"
 echo "=========================================="
 echo ""
 echo "Verify with:"
 echo "  docker-compose exec ollama ollama list"
 echo ""
 echo "Test with:"
 echo "  curl http://localhost:5001/api/ollama/test"
--- a/scripts/setup-ollama-model.sh
+++ b/scripts/setup-ollama-model.sh
@@ -0,0 +1,57 @@
 #!/bin/sh
 # Ollama Model Setup Script
 # Checks if model exists and downloads if needed
 set -e
 MODEL="${OLLAMA_MODEL:-phi3:latest}"
 echo "========================================"
 echo "Ollama Model Setup"
 echo "Target model: $MODEL"
 echo "========================================"
 echo ""
 # Wait for Ollama to be ready
 echo "Waiting for Ollama service..."
 sleep 3
 # Check if model exists
 echo "Checking if model exists..."
 MODELS=$(curl -s http://ollama:11434/api/tags 2>/dev/null || echo "")
 if [ -z "$MODELS" ]; then
    echo "⚠ Warning: Could not connect to Ollama"
    echo "Attempting to pull model anyway..."
    curl -X POST http://ollama:11434/api/pull -d "{\"name\":\"$MODEL\"}"
    echo ""
    echo "✓ Model pull initiated: $MODEL"
    exit 0
 fi
 # Check if our model is in the list
 if echo "$MODELS" | grep -q "\"$MODEL\""; then
    echo "✓ Model already exists: $MODEL"
    echo "Skipping download."
    echo ""
    echo "Available models:"
    echo "$MODELS" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 | sed 's/^/  - /'
 else
    echo "⬇ Model not found, downloading: $MODEL"
    echo "This may take 2-10 minutes depending on model size..."
    echo ""
    # Pull the model
    curl -X POST http://ollama:11434/api/pull -d "{\"name\":\"$MODEL\"}"
    echo ""
    echo "✓ Model download initiated: $MODEL"
    echo ""
    echo "Monitor progress with:"
    echo "  docker-compose logs -f ollama"
 fi
 echo ""
 echo "========================================"
 echo "Setup complete!"
 echo "========================================"