From d59372d1d60e886b8b5f333c3eae1fa68cc66bbc Mon Sep 17 00:00:00 2001 From: Dongho Kim Date: Wed, 12 Nov 2025 11:55:53 +0100 Subject: [PATCH] update --- README.md | 2 + backend/routes/ollama_routes.py | 160 ++++++++++++++++++ check-gpu-api.sh | 46 ++++++ docker-compose.yml | 15 +- docs/CHANGING_AI_MODEL.md | 114 +++++++++++-- docs/CHECK_GPU_STATUS.md | 276 ++++++++++++++++++++++++++++++++ pull-ollama-model.sh | 44 +++++ scripts/setup-ollama-model.sh | 57 +++++++ 8 files changed, 694 insertions(+), 20 deletions(-) create mode 100755 check-gpu-api.sh create mode 100644 docs/CHECK_GPU_STATUS.md create mode 100755 pull-ollama-model.sh create mode 100755 scripts/setup-ollama-model.sh diff --git a/README.md b/README.md index f6b6f50..6106eab 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,8 @@ That's it! The system will automatically: 📖 **For detailed Ollama setup & GPU acceleration:** See [docs/OLLAMA_SETUP.md](docs/OLLAMA_SETUP.md) +💡 **To change AI model:** Edit `OLLAMA_MODEL` in `.env`, then run `./pull-ollama-model.sh`. See [docs/CHANGING_AI_MODEL.md](docs/CHANGING_AI_MODEL.md) + ## ⚙️ Configuration Edit `backend/.env`: diff --git a/backend/routes/ollama_routes.py b/backend/routes/ollama_routes.py index 84628d8..442f893 100644 --- a/backend/routes/ollama_routes.py +++ b/backend/routes/ollama_routes.py @@ -156,3 +156,163 @@ def get_ollama_models(): 'enabled': Config.OLLAMA_ENABLED } }), 500 + + +@ollama_bp.route('/api/ollama/gpu-status', methods=['GET']) +def get_gpu_status(): + """Check if Ollama is using GPU acceleration""" + import requests + + try: + if not Config.OLLAMA_ENABLED: + return jsonify({ + 'status': 'disabled', + 'message': 'Ollama is not enabled', + 'gpu_available': False, + 'gpu_in_use': False + }), 200 + + # Get Ollama process info + try: + response = requests.get( + f"{Config.OLLAMA_BASE_URL}/api/ps", + timeout=5 + ) + + if response.status_code == 200: + ps_data = response.json() + + # Check if any models are loaded + models_loaded = ps_data.get('models', []) + + gpu_info = { + 'status': 'success', + 'ollama_running': True, + 'models_loaded': len(models_loaded), + 'gpu_available': False, + 'gpu_in_use': False, + 'gpu_details': None + } + + # Check for GPU usage in loaded models + for model in models_loaded: + if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0: + gpu_info['gpu_in_use'] = True + gpu_info['gpu_available'] = True + gpu_info['gpu_details'] = { + 'model': model.get('name', 'unknown'), + 'gpu_layers': model.get('gpu_layers', 0), + 'size': model.get('size', 0) + } + break + + # Try to get system info + try: + tags_response = requests.get( + f"{Config.OLLAMA_BASE_URL}/api/tags", + timeout=5 + ) + if tags_response.status_code == 200: + tags_data = tags_response.json() + gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])] + except: + pass + + # Add recommendation + if not gpu_info['gpu_in_use']: + gpu_info['recommendation'] = ( + "GPU not detected. To enable GPU acceleration:\n" + "1. Ensure NVIDIA GPU is available\n" + "2. Install nvidia-docker2\n" + "3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n" + "4. See docs/GPU_SETUP.md for details" + ) + else: + gpu_info['recommendation'] = "✓ GPU acceleration is active!" + + return jsonify(gpu_info), 200 + else: + return jsonify({ + 'status': 'error', + 'message': f'Ollama API returned status {response.status_code}', + 'ollama_running': False, + 'gpu_available': False, + 'gpu_in_use': False + }), 500 + + except requests.exceptions.ConnectionError: + return jsonify({ + 'status': 'error', + 'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}', + 'ollama_running': False, + 'gpu_available': False, + 'gpu_in_use': False, + 'troubleshooting': { + 'check_container': 'docker-compose ps ollama', + 'check_logs': 'docker-compose logs ollama', + 'restart': 'docker-compose restart ollama' + } + }), 500 + + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': f'Error checking GPU status: {str(e)}', + 'gpu_available': False, + 'gpu_in_use': False + }), 500 + + +@ollama_bp.route('/api/ollama/test', methods=['GET']) +def test_ollama_performance(): + """Test Ollama performance and measure response time""" + import time + + try: + if not Config.OLLAMA_ENABLED: + return jsonify({ + 'status': 'disabled', + 'message': 'Ollama is not enabled' + }), 200 + + # Test prompt + test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture." + + start_time = time.time() + response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.") + duration = time.time() - start_time + + if response_text: + # Estimate performance + if duration < 5: + performance = "Excellent (GPU likely active)" + elif duration < 15: + performance = "Good (GPU may be active)" + elif duration < 30: + performance = "Fair (CPU mode)" + else: + performance = "Slow (CPU mode, consider GPU)" + + return jsonify({ + 'status': 'success', + 'response': response_text, + 'duration_seconds': round(duration, 2), + 'performance': performance, + 'model': Config.OLLAMA_MODEL, + 'recommendation': ( + "GPU acceleration recommended" if duration > 15 + else "Performance is good" + ) + }), 200 + else: + return jsonify({ + 'status': 'error', + 'message': error_message or 'Failed to get response', + 'duration_seconds': round(duration, 2) + }), 500 + + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': f'Error testing Ollama: {str(e)}' + }), 500 diff --git a/check-gpu-api.sh b/check-gpu-api.sh new file mode 100755 index 0000000..b6d3b9f --- /dev/null +++ b/check-gpu-api.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# Check GPU status via API + +echo "==========================================" +echo "Ollama GPU Status Check" +echo "==========================================" +echo "" + +# Check GPU status +echo "1. GPU Status:" +echo "---" +curl -s http://localhost:5001/api/ollama/gpu-status | python3 -m json.tool +echo "" +echo "" + +# Test performance +echo "2. Performance Test:" +echo "---" +curl -s http://localhost:5001/api/ollama/test | python3 -m json.tool +echo "" +echo "" + +# List models +echo "3. Available Models:" +echo "---" +curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool +echo "" +echo "" + +echo "==========================================" +echo "Quick Summary:" +echo "==========================================" + +# Extract key info +GPU_STATUS=$(curl -s http://localhost:5001/api/ollama/gpu-status | python3 -c "import json,sys; data=json.load(sys.stdin); print('GPU Active' if data.get('gpu_in_use') else 'CPU Mode')" 2>/dev/null || echo "Error") +PERF=$(curl -s http://localhost:5001/api/ollama/test | python3 -c "import json,sys; data=json.load(sys.stdin); print(f\"{data.get('duration_seconds', 'N/A')}s - {data.get('performance', 'N/A')}\")" 2>/dev/null || echo "Error") + +echo "GPU Status: $GPU_STATUS" +echo "Performance: $PERF" +echo "" + +if [ "$GPU_STATUS" = "CPU Mode" ]; then + echo "💡 TIP: Enable GPU for 5-10x faster processing:" + echo " docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d" + echo " See docs/GPU_SETUP.md for details" +fi diff --git a/docker-compose.yml b/docker-compose.yml index d42fd45..f5b095f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -52,17 +52,10 @@ services: - munich-news-network env_file: - backend/.env - entrypoint: /bin/sh - command: > - -c " - echo 'Waiting for Ollama service to be ready...' && - sleep 5 && - echo 'Pulling model: ${OLLAMA_MODEL:-phi3:latest}' && - curl -X POST http://ollama:11434/api/pull -d '{\"name\":\"${OLLAMA_MODEL:-phi3:latest}\"}' && - echo '' && - echo 'Model ${OLLAMA_MODEL:-phi3:latest} pull initiated!' - " - restart: "no" + volumes: + - ./scripts/setup-ollama-model.sh:/setup-ollama-model.sh:ro + command: sh /setup-ollama-model.sh + restart: on-failure # MongoDB Database (Internal only - not exposed to host) mongodb: diff --git a/docs/CHANGING_AI_MODEL.md b/docs/CHANGING_AI_MODEL.md index 187ee77..592ca49 100644 --- a/docs/CHANGING_AI_MODEL.md +++ b/docs/CHANGING_AI_MODEL.md @@ -15,6 +15,21 @@ OLLAMA_MODEL=phi3:latest ## ✅ How to Change the Model +### Important Note + +✅ **The model IS automatically checked and downloaded on startup** + +The `ollama-setup` service runs on every `docker-compose up` and: +- Checks if the model specified in `.env` exists +- Downloads it if missing +- Skips download if already present + +This means you can simply: +1. Change `OLLAMA_MODEL` in `.env` +2. Run `docker-compose up -d` +3. Wait for download (if needed) +4. Done! + ### Step 1: Update .env File Edit `backend/.env` and change the `OLLAMA_MODEL` value: @@ -30,22 +45,38 @@ OLLAMA_MODEL=mistral:7b OLLAMA_MODEL=your-custom-model:latest ``` -### Step 2: Restart Services - -The model will be automatically downloaded on startup: +### Step 2: Restart Services (Model Auto-Downloads) +**Option A: Simple restart (Recommended)** ```bash -# Stop services -docker-compose down - -# Start services (model will be pulled automatically) +# Restart all services docker-compose up -d -# Watch the download progress +# Watch the model check/download docker-compose logs -f ollama-setup ``` -**Note:** First startup with a new model takes 2-10 minutes depending on model size. +The `ollama-setup` service will: +- Check if the new model exists +- Download it if missing (2-10 minutes) +- Skip download if already present + +**Option B: Manual pull (if you want control)** +```bash +# Pull the model manually first +./pull-ollama-model.sh + +# Then restart +docker-compose restart crawler backend +``` + +**Option C: Full restart** +```bash +docker-compose down +docker-compose up -d +``` + +**Note:** Model download takes 2-10 minutes depending on model size. ## Supported Models @@ -264,3 +295,68 @@ A: 5-10GB for small models, 50GB+ for large models. Plan accordingly. - [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama installation & configuration - [GPU_SETUP.md](GPU_SETUP.md) - GPU acceleration setup - [AI_NEWS_AGGREGATION.md](AI_NEWS_AGGREGATION.md) - AI features overview + + +## Complete Example: Changing from phi3 to llama3 + +```bash +# 1. Check current model +curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool +# Shows: "current_model": "phi3:latest" + +# 2. Update .env file +# Edit backend/.env and change: +# OLLAMA_MODEL=llama3:8b + +# 3. Pull the new model +./pull-ollama-model.sh +# Or manually: docker-compose exec ollama ollama pull llama3:8b + +# 4. Restart services +docker-compose restart crawler backend + +# 5. Verify the change +curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool +# Shows: "current_model": "llama3:8b" + +# 6. Test performance +curl -s http://localhost:5001/api/ollama/test | python3 -m json.tool +# Should show improved quality with llama3 +``` + +## Quick Reference + +### Change Model Workflow + +```bash +# 1. Edit .env +vim backend/.env # Change OLLAMA_MODEL + +# 2. Pull model +./pull-ollama-model.sh + +# 3. Restart +docker-compose restart crawler backend + +# 4. Verify +curl http://localhost:5001/api/ollama/test +``` + +### Common Commands + +```bash +# List downloaded models +docker-compose exec ollama ollama list + +# Pull a specific model +docker-compose exec ollama ollama pull mistral:7b + +# Remove a model +docker-compose exec ollama ollama rm phi3:latest + +# Check current config +curl http://localhost:5001/api/ollama/config + +# Test performance +curl http://localhost:5001/api/ollama/test +``` diff --git a/docs/CHECK_GPU_STATUS.md b/docs/CHECK_GPU_STATUS.md new file mode 100644 index 0000000..bb40e99 --- /dev/null +++ b/docs/CHECK_GPU_STATUS.md @@ -0,0 +1,276 @@ +# How to Check GPU Status via API + +## Quick Check + +### 1. GPU Status +```bash +curl http://localhost:5001/api/ollama/gpu-status | python3 -m json.tool +``` + +**Response:** +```json +{ + "status": "success", + "ollama_running": true, + "gpu_available": true, + "gpu_in_use": true, + "gpu_details": { + "model": "phi3:latest", + "gpu_layers": 32, + "size": 2300000000 + }, + "recommendation": "✓ GPU acceleration is active!" +} +``` + +### 2. Performance Test +```bash +curl http://localhost:5001/api/ollama/test | python3 -m json.tool +``` + +**Response:** +```json +{ + "status": "success", + "duration_seconds": 3.2, + "performance": "Excellent (GPU likely active)", + "model": "phi3:latest", + "recommendation": "Performance is good" +} +``` + +### 3. List Models +```bash +curl http://localhost:5001/api/ollama/models | python3 -m json.tool +``` + +## Using the Check Script + +We've created a convenient script: + +```bash +./check-gpu-api.sh +``` + +**Output:** +``` +========================================== +Ollama GPU Status Check +========================================== + +1. GPU Status: +--- +{ + "status": "success", + "gpu_in_use": true, + ... +} + +2. Performance Test: +--- +{ + "duration_seconds": 3.2, + "performance": "Excellent (GPU likely active)" +} + +3. Available Models: +--- +{ + "models": ["phi3:latest", "llama3:8b"] +} + +========================================== +Quick Summary: +========================================== +GPU Status: GPU Active +Performance: 3.2s - Excellent (GPU likely active) +``` + +## API Endpoints + +### GET /api/ollama/gpu-status +Check if GPU is being used by Ollama. + +**Response Fields:** +- `gpu_available` - GPU hardware detected +- `gpu_in_use` - Ollama actively using GPU +- `gpu_details` - GPU configuration details +- `recommendation` - Setup suggestions + +### GET /api/ollama/test +Test Ollama performance with a sample prompt. + +**Response Fields:** +- `duration_seconds` - Time taken for test +- `performance` - Performance rating +- `recommendation` - Performance suggestions + +### GET /api/ollama/models +List all available models. + +**Response Fields:** +- `models` - Array of model names +- `current_model` - Active model from .env + +### GET /api/ollama/ping +Test basic Ollama connectivity. + +### GET /api/ollama/config +View current Ollama configuration. + +## Interpreting Results + +### GPU Status + +**✅ GPU Active:** +```json +{ + "gpu_in_use": true, + "gpu_available": true +} +``` +- GPU acceleration is working +- Expect 5-10x faster processing + +**❌ CPU Mode:** +```json +{ + "gpu_in_use": false, + "gpu_available": false +} +``` +- Running on CPU only +- Slower processing (15-30s per article) + +### Performance Ratings + +| Duration | Rating | Mode | +|----------|--------|------| +| < 5s | Excellent | GPU likely active | +| 5-15s | Good | GPU may be active | +| 15-30s | Fair | CPU mode | +| > 30s | Slow | CPU mode, GPU recommended | + +## Troubleshooting + +### GPU Not Detected + +1. **Check if GPU compose is used:** + ```bash + docker-compose ps + # Should show GPU configuration + ``` + +2. **Verify NVIDIA runtime:** + ```bash + docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi + ``` + +3. **Check Ollama logs:** + ```bash + docker-compose logs ollama | grep -i gpu + ``` + +### Slow Performance + +If performance test shows > 15s: + +1. **Enable GPU acceleration:** + ```bash + docker-compose down + docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d + ``` + +2. **Verify GPU is available:** + ```bash + nvidia-smi + ``` + +3. **Check model size:** + - Larger models = slower + - Try `phi3:latest` for fastest performance + +### Connection Errors + +If API returns connection errors: + +1. **Check backend is running:** + ```bash + docker-compose ps backend + ``` + +2. **Check Ollama is running:** + ```bash + docker-compose ps ollama + ``` + +3. **Restart services:** + ```bash + docker-compose restart backend ollama + ``` + +## Monitoring in Production + +### Automated Checks + +Add to your monitoring: + +```bash +# Check GPU status every 5 minutes +*/5 * * * * curl -s http://localhost:5001/api/ollama/gpu-status | \ + python3 -c "import json,sys; data=json.load(sys.stdin); \ + sys.exit(0 if data.get('gpu_in_use') else 1)" +``` + +### Performance Alerts + +Alert if performance degrades: + +```bash +# Alert if response time > 20s +DURATION=$(curl -s http://localhost:5001/api/ollama/test | \ + python3 -c "import json,sys; print(json.load(sys.stdin).get('duration_seconds', 999))") + +if (( $(echo "$DURATION > 20" | bc -l) )); then + echo "ALERT: Ollama performance degraded: ${DURATION}s" +fi +``` + +## Example: Full Health Check + +```bash +#!/bin/bash +# health-check.sh + +echo "Checking Ollama Health..." + +# 1. GPU Status +GPU=$(curl -s http://localhost:5001/api/ollama/gpu-status | \ + python3 -c "import json,sys; print('GPU' if json.load(sys.stdin).get('gpu_in_use') else 'CPU')") + +# 2. Performance +PERF=$(curl -s http://localhost:5001/api/ollama/test | \ + python3 -c "import json,sys; data=json.load(sys.stdin); print(f\"{data.get('duration_seconds')}s\")") + +# 3. Models +MODELS=$(curl -s http://localhost:5001/api/ollama/models | \ + python3 -c "import json,sys; print(len(json.load(sys.stdin).get('models', [])))") + +echo "Mode: $GPU" +echo "Performance: $PERF" +echo "Models: $MODELS" + +# Exit with error if CPU mode and slow +if [ "$GPU" = "CPU" ] && (( $(echo "$PERF > 20" | bc -l) )); then + echo "WARNING: Running in CPU mode with slow performance" + exit 1 +fi + +echo "✓ Health check passed" +``` + +## Related Documentation + +- [GPU_SETUP.md](GPU_SETUP.md) - GPU setup guide +- [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama configuration +- [CHANGING_AI_MODEL.md](CHANGING_AI_MODEL.md) - Model switching guide diff --git a/pull-ollama-model.sh b/pull-ollama-model.sh new file mode 100755 index 0000000..292c1f3 --- /dev/null +++ b/pull-ollama-model.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Pull Ollama model from .env file + +set -e + +# Load OLLAMA_MODEL from .env +if [ -f backend/.env ]; then + export $(grep -v '^#' backend/.env | grep OLLAMA_MODEL | xargs) +else + echo "Error: backend/.env file not found" + exit 1 +fi + +# Default to phi3:latest if not set +MODEL=${OLLAMA_MODEL:-phi3:latest} + +echo "==========================================" +echo "Pulling Ollama Model: $MODEL" +echo "==========================================" +echo "" + +# Check if Ollama container is running +if ! docker-compose ps ollama | grep -q "Up"; then + echo "Error: Ollama container is not running" + echo "Start it with: docker-compose up -d ollama" + exit 1 +fi + +echo "Pulling model via Ollama API..." +echo "" + +# Pull the model +docker-compose exec -T ollama ollama pull "$MODEL" + +echo "" +echo "==========================================" +echo "✓ Model $MODEL pulled successfully!" +echo "==========================================" +echo "" +echo "Verify with:" +echo " docker-compose exec ollama ollama list" +echo "" +echo "Test with:" +echo " curl http://localhost:5001/api/ollama/test" diff --git a/scripts/setup-ollama-model.sh b/scripts/setup-ollama-model.sh new file mode 100755 index 0000000..78f93c2 --- /dev/null +++ b/scripts/setup-ollama-model.sh @@ -0,0 +1,57 @@ +#!/bin/sh +# Ollama Model Setup Script +# Checks if model exists and downloads if needed + +set -e + +MODEL="${OLLAMA_MODEL:-phi3:latest}" + +echo "========================================" +echo "Ollama Model Setup" +echo "Target model: $MODEL" +echo "========================================" +echo "" + +# Wait for Ollama to be ready +echo "Waiting for Ollama service..." +sleep 3 + +# Check if model exists +echo "Checking if model exists..." +MODELS=$(curl -s http://ollama:11434/api/tags 2>/dev/null || echo "") + +if [ -z "$MODELS" ]; then + echo "⚠ Warning: Could not connect to Ollama" + echo "Attempting to pull model anyway..." + curl -X POST http://ollama:11434/api/pull -d "{\"name\":\"$MODEL\"}" + echo "" + echo "✓ Model pull initiated: $MODEL" + exit 0 +fi + +# Check if our model is in the list +if echo "$MODELS" | grep -q "\"$MODEL\""; then + echo "✓ Model already exists: $MODEL" + echo "Skipping download." + echo "" + echo "Available models:" + echo "$MODELS" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 | sed 's/^/ - /' +else + echo "⬇ Model not found, downloading: $MODEL" + echo "This may take 2-10 minutes depending on model size..." + echo "" + + # Pull the model + curl -X POST http://ollama:11434/api/pull -d "{\"name\":\"$MODEL\"}" + + echo "" + echo "✓ Model download initiated: $MODEL" + echo "" + echo "Monitor progress with:" + echo " docker-compose logs -f ollama" +fi + +echo "" +echo "========================================" +echo "Setup complete!" +echo "========================================"