update
This commit is contained in:
@@ -124,6 +124,8 @@ That's it! The system will automatically:
|
|||||||
|
|
||||||
📖 **For detailed Ollama setup & GPU acceleration:** See [docs/OLLAMA_SETUP.md](docs/OLLAMA_SETUP.md)
|
📖 **For detailed Ollama setup & GPU acceleration:** See [docs/OLLAMA_SETUP.md](docs/OLLAMA_SETUP.md)
|
||||||
|
|
||||||
|
💡 **To change AI model:** Edit `OLLAMA_MODEL` in `.env`, then run `./pull-ollama-model.sh`. See [docs/CHANGING_AI_MODEL.md](docs/CHANGING_AI_MODEL.md)
|
||||||
|
|
||||||
## ⚙️ Configuration
|
## ⚙️ Configuration
|
||||||
|
|
||||||
Edit `backend/.env`:
|
Edit `backend/.env`:
|
||||||
|
|||||||
@@ -156,3 +156,163 @@ def get_ollama_models():
|
|||||||
'enabled': Config.OLLAMA_ENABLED
|
'enabled': Config.OLLAMA_ENABLED
|
||||||
}
|
}
|
||||||
}), 500
|
}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@ollama_bp.route('/api/ollama/gpu-status', methods=['GET'])
|
||||||
|
def get_gpu_status():
|
||||||
|
"""Check if Ollama is using GPU acceleration"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not Config.OLLAMA_ENABLED:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'disabled',
|
||||||
|
'message': 'Ollama is not enabled',
|
||||||
|
'gpu_available': False,
|
||||||
|
'gpu_in_use': False
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
# Get Ollama process info
|
||||||
|
try:
|
||||||
|
response = requests.get(
|
||||||
|
f"{Config.OLLAMA_BASE_URL}/api/ps",
|
||||||
|
timeout=5
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
ps_data = response.json()
|
||||||
|
|
||||||
|
# Check if any models are loaded
|
||||||
|
models_loaded = ps_data.get('models', [])
|
||||||
|
|
||||||
|
gpu_info = {
|
||||||
|
'status': 'success',
|
||||||
|
'ollama_running': True,
|
||||||
|
'models_loaded': len(models_loaded),
|
||||||
|
'gpu_available': False,
|
||||||
|
'gpu_in_use': False,
|
||||||
|
'gpu_details': None
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check for GPU usage in loaded models
|
||||||
|
for model in models_loaded:
|
||||||
|
if 'gpu' in str(model).lower() or model.get('gpu_layers', 0) > 0:
|
||||||
|
gpu_info['gpu_in_use'] = True
|
||||||
|
gpu_info['gpu_available'] = True
|
||||||
|
gpu_info['gpu_details'] = {
|
||||||
|
'model': model.get('name', 'unknown'),
|
||||||
|
'gpu_layers': model.get('gpu_layers', 0),
|
||||||
|
'size': model.get('size', 0)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
|
||||||
|
# Try to get system info
|
||||||
|
try:
|
||||||
|
tags_response = requests.get(
|
||||||
|
f"{Config.OLLAMA_BASE_URL}/api/tags",
|
||||||
|
timeout=5
|
||||||
|
)
|
||||||
|
if tags_response.status_code == 200:
|
||||||
|
tags_data = tags_response.json()
|
||||||
|
gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Add recommendation
|
||||||
|
if not gpu_info['gpu_in_use']:
|
||||||
|
gpu_info['recommendation'] = (
|
||||||
|
"GPU not detected. To enable GPU acceleration:\n"
|
||||||
|
"1. Ensure NVIDIA GPU is available\n"
|
||||||
|
"2. Install nvidia-docker2\n"
|
||||||
|
"3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n"
|
||||||
|
"4. See docs/GPU_SETUP.md for details"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
gpu_info['recommendation'] = "✓ GPU acceleration is active!"
|
||||||
|
|
||||||
|
return jsonify(gpu_info), 200
|
||||||
|
else:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': f'Ollama API returned status {response.status_code}',
|
||||||
|
'ollama_running': False,
|
||||||
|
'gpu_available': False,
|
||||||
|
'gpu_in_use': False
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}',
|
||||||
|
'ollama_running': False,
|
||||||
|
'gpu_available': False,
|
||||||
|
'gpu_in_use': False,
|
||||||
|
'troubleshooting': {
|
||||||
|
'check_container': 'docker-compose ps ollama',
|
||||||
|
'check_logs': 'docker-compose logs ollama',
|
||||||
|
'restart': 'docker-compose restart ollama'
|
||||||
|
}
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': f'Error checking GPU status: {str(e)}',
|
||||||
|
'gpu_available': False,
|
||||||
|
'gpu_in_use': False
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@ollama_bp.route('/api/ollama/test', methods=['GET'])
|
||||||
|
def test_ollama_performance():
|
||||||
|
"""Test Ollama performance and measure response time"""
|
||||||
|
import time
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not Config.OLLAMA_ENABLED:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'disabled',
|
||||||
|
'message': 'Ollama is not enabled'
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
# Test prompt
|
||||||
|
test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture."
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.")
|
||||||
|
duration = time.time() - start_time
|
||||||
|
|
||||||
|
if response_text:
|
||||||
|
# Estimate performance
|
||||||
|
if duration < 5:
|
||||||
|
performance = "Excellent (GPU likely active)"
|
||||||
|
elif duration < 15:
|
||||||
|
performance = "Good (GPU may be active)"
|
||||||
|
elif duration < 30:
|
||||||
|
performance = "Fair (CPU mode)"
|
||||||
|
else:
|
||||||
|
performance = "Slow (CPU mode, consider GPU)"
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
'status': 'success',
|
||||||
|
'response': response_text,
|
||||||
|
'duration_seconds': round(duration, 2),
|
||||||
|
'performance': performance,
|
||||||
|
'model': Config.OLLAMA_MODEL,
|
||||||
|
'recommendation': (
|
||||||
|
"GPU acceleration recommended" if duration > 15
|
||||||
|
else "Performance is good"
|
||||||
|
)
|
||||||
|
}), 200
|
||||||
|
else:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': error_message or 'Failed to get response',
|
||||||
|
'duration_seconds': round(duration, 2)
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': f'Error testing Ollama: {str(e)}'
|
||||||
|
}), 500
|
||||||
|
|||||||
46
check-gpu-api.sh
Executable file
46
check-gpu-api.sh
Executable file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Check GPU status via API
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "Ollama GPU Status Check"
|
||||||
|
echo "=========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check GPU status
|
||||||
|
echo "1. GPU Status:"
|
||||||
|
echo "---"
|
||||||
|
curl -s http://localhost:5001/api/ollama/gpu-status | python3 -m json.tool
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Test performance
|
||||||
|
echo "2. Performance Test:"
|
||||||
|
echo "---"
|
||||||
|
curl -s http://localhost:5001/api/ollama/test | python3 -m json.tool
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# List models
|
||||||
|
echo "3. Available Models:"
|
||||||
|
echo "---"
|
||||||
|
curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "Quick Summary:"
|
||||||
|
echo "=========================================="
|
||||||
|
|
||||||
|
# Extract key info
|
||||||
|
GPU_STATUS=$(curl -s http://localhost:5001/api/ollama/gpu-status | python3 -c "import json,sys; data=json.load(sys.stdin); print('GPU Active' if data.get('gpu_in_use') else 'CPU Mode')" 2>/dev/null || echo "Error")
|
||||||
|
PERF=$(curl -s http://localhost:5001/api/ollama/test | python3 -c "import json,sys; data=json.load(sys.stdin); print(f\"{data.get('duration_seconds', 'N/A')}s - {data.get('performance', 'N/A')}\")" 2>/dev/null || echo "Error")
|
||||||
|
|
||||||
|
echo "GPU Status: $GPU_STATUS"
|
||||||
|
echo "Performance: $PERF"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [ "$GPU_STATUS" = "CPU Mode" ]; then
|
||||||
|
echo "💡 TIP: Enable GPU for 5-10x faster processing:"
|
||||||
|
echo " docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d"
|
||||||
|
echo " See docs/GPU_SETUP.md for details"
|
||||||
|
fi
|
||||||
@@ -52,17 +52,10 @@ services:
|
|||||||
- munich-news-network
|
- munich-news-network
|
||||||
env_file:
|
env_file:
|
||||||
- backend/.env
|
- backend/.env
|
||||||
entrypoint: /bin/sh
|
volumes:
|
||||||
command: >
|
- ./scripts/setup-ollama-model.sh:/setup-ollama-model.sh:ro
|
||||||
-c "
|
command: sh /setup-ollama-model.sh
|
||||||
echo 'Waiting for Ollama service to be ready...' &&
|
restart: on-failure
|
||||||
sleep 5 &&
|
|
||||||
echo 'Pulling model: ${OLLAMA_MODEL:-phi3:latest}' &&
|
|
||||||
curl -X POST http://ollama:11434/api/pull -d '{\"name\":\"${OLLAMA_MODEL:-phi3:latest}\"}' &&
|
|
||||||
echo '' &&
|
|
||||||
echo 'Model ${OLLAMA_MODEL:-phi3:latest} pull initiated!'
|
|
||||||
"
|
|
||||||
restart: "no"
|
|
||||||
|
|
||||||
# MongoDB Database (Internal only - not exposed to host)
|
# MongoDB Database (Internal only - not exposed to host)
|
||||||
mongodb:
|
mongodb:
|
||||||
|
|||||||
@@ -15,6 +15,21 @@ OLLAMA_MODEL=phi3:latest
|
|||||||
|
|
||||||
## ✅ How to Change the Model
|
## ✅ How to Change the Model
|
||||||
|
|
||||||
|
### Important Note
|
||||||
|
|
||||||
|
✅ **The model IS automatically checked and downloaded on startup**
|
||||||
|
|
||||||
|
The `ollama-setup` service runs on every `docker-compose up` and:
|
||||||
|
- Checks if the model specified in `.env` exists
|
||||||
|
- Downloads it if missing
|
||||||
|
- Skips download if already present
|
||||||
|
|
||||||
|
This means you can simply:
|
||||||
|
1. Change `OLLAMA_MODEL` in `.env`
|
||||||
|
2. Run `docker-compose up -d`
|
||||||
|
3. Wait for download (if needed)
|
||||||
|
4. Done!
|
||||||
|
|
||||||
### Step 1: Update .env File
|
### Step 1: Update .env File
|
||||||
|
|
||||||
Edit `backend/.env` and change the `OLLAMA_MODEL` value:
|
Edit `backend/.env` and change the `OLLAMA_MODEL` value:
|
||||||
@@ -30,22 +45,38 @@ OLLAMA_MODEL=mistral:7b
|
|||||||
OLLAMA_MODEL=your-custom-model:latest
|
OLLAMA_MODEL=your-custom-model:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 2: Restart Services
|
### Step 2: Restart Services (Model Auto-Downloads)
|
||||||
|
|
||||||
The model will be automatically downloaded on startup:
|
|
||||||
|
|
||||||
|
**Option A: Simple restart (Recommended)**
|
||||||
```bash
|
```bash
|
||||||
# Stop services
|
# Restart all services
|
||||||
docker-compose down
|
|
||||||
|
|
||||||
# Start services (model will be pulled automatically)
|
|
||||||
docker-compose up -d
|
docker-compose up -d
|
||||||
|
|
||||||
# Watch the download progress
|
# Watch the model check/download
|
||||||
docker-compose logs -f ollama-setup
|
docker-compose logs -f ollama-setup
|
||||||
```
|
```
|
||||||
|
|
||||||
**Note:** First startup with a new model takes 2-10 minutes depending on model size.
|
The `ollama-setup` service will:
|
||||||
|
- Check if the new model exists
|
||||||
|
- Download it if missing (2-10 minutes)
|
||||||
|
- Skip download if already present
|
||||||
|
|
||||||
|
**Option B: Manual pull (if you want control)**
|
||||||
|
```bash
|
||||||
|
# Pull the model manually first
|
||||||
|
./pull-ollama-model.sh
|
||||||
|
|
||||||
|
# Then restart
|
||||||
|
docker-compose restart crawler backend
|
||||||
|
```
|
||||||
|
|
||||||
|
**Option C: Full restart**
|
||||||
|
```bash
|
||||||
|
docker-compose down
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** Model download takes 2-10 minutes depending on model size.
|
||||||
|
|
||||||
## Supported Models
|
## Supported Models
|
||||||
|
|
||||||
@@ -264,3 +295,68 @@ A: 5-10GB for small models, 50GB+ for large models. Plan accordingly.
|
|||||||
- [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama installation & configuration
|
- [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama installation & configuration
|
||||||
- [GPU_SETUP.md](GPU_SETUP.md) - GPU acceleration setup
|
- [GPU_SETUP.md](GPU_SETUP.md) - GPU acceleration setup
|
||||||
- [AI_NEWS_AGGREGATION.md](AI_NEWS_AGGREGATION.md) - AI features overview
|
- [AI_NEWS_AGGREGATION.md](AI_NEWS_AGGREGATION.md) - AI features overview
|
||||||
|
|
||||||
|
|
||||||
|
## Complete Example: Changing from phi3 to llama3
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Check current model
|
||||||
|
curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
|
||||||
|
# Shows: "current_model": "phi3:latest"
|
||||||
|
|
||||||
|
# 2. Update .env file
|
||||||
|
# Edit backend/.env and change:
|
||||||
|
# OLLAMA_MODEL=llama3:8b
|
||||||
|
|
||||||
|
# 3. Pull the new model
|
||||||
|
./pull-ollama-model.sh
|
||||||
|
# Or manually: docker-compose exec ollama ollama pull llama3:8b
|
||||||
|
|
||||||
|
# 4. Restart services
|
||||||
|
docker-compose restart crawler backend
|
||||||
|
|
||||||
|
# 5. Verify the change
|
||||||
|
curl -s http://localhost:5001/api/ollama/models | python3 -m json.tool
|
||||||
|
# Shows: "current_model": "llama3:8b"
|
||||||
|
|
||||||
|
# 6. Test performance
|
||||||
|
curl -s http://localhost:5001/api/ollama/test | python3 -m json.tool
|
||||||
|
# Should show improved quality with llama3
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Reference
|
||||||
|
|
||||||
|
### Change Model Workflow
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Edit .env
|
||||||
|
vim backend/.env # Change OLLAMA_MODEL
|
||||||
|
|
||||||
|
# 2. Pull model
|
||||||
|
./pull-ollama-model.sh
|
||||||
|
|
||||||
|
# 3. Restart
|
||||||
|
docker-compose restart crawler backend
|
||||||
|
|
||||||
|
# 4. Verify
|
||||||
|
curl http://localhost:5001/api/ollama/test
|
||||||
|
```
|
||||||
|
|
||||||
|
### Common Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# List downloaded models
|
||||||
|
docker-compose exec ollama ollama list
|
||||||
|
|
||||||
|
# Pull a specific model
|
||||||
|
docker-compose exec ollama ollama pull mistral:7b
|
||||||
|
|
||||||
|
# Remove a model
|
||||||
|
docker-compose exec ollama ollama rm phi3:latest
|
||||||
|
|
||||||
|
# Check current config
|
||||||
|
curl http://localhost:5001/api/ollama/config
|
||||||
|
|
||||||
|
# Test performance
|
||||||
|
curl http://localhost:5001/api/ollama/test
|
||||||
|
```
|
||||||
|
|||||||
276
docs/CHECK_GPU_STATUS.md
Normal file
276
docs/CHECK_GPU_STATUS.md
Normal file
@@ -0,0 +1,276 @@
|
|||||||
|
# How to Check GPU Status via API
|
||||||
|
|
||||||
|
## Quick Check
|
||||||
|
|
||||||
|
### 1. GPU Status
|
||||||
|
```bash
|
||||||
|
curl http://localhost:5001/api/ollama/gpu-status | python3 -m json.tool
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "success",
|
||||||
|
"ollama_running": true,
|
||||||
|
"gpu_available": true,
|
||||||
|
"gpu_in_use": true,
|
||||||
|
"gpu_details": {
|
||||||
|
"model": "phi3:latest",
|
||||||
|
"gpu_layers": 32,
|
||||||
|
"size": 2300000000
|
||||||
|
},
|
||||||
|
"recommendation": "✓ GPU acceleration is active!"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Performance Test
|
||||||
|
```bash
|
||||||
|
curl http://localhost:5001/api/ollama/test | python3 -m json.tool
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "success",
|
||||||
|
"duration_seconds": 3.2,
|
||||||
|
"performance": "Excellent (GPU likely active)",
|
||||||
|
"model": "phi3:latest",
|
||||||
|
"recommendation": "Performance is good"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. List Models
|
||||||
|
```bash
|
||||||
|
curl http://localhost:5001/api/ollama/models | python3 -m json.tool
|
||||||
|
```
|
||||||
|
|
||||||
|
## Using the Check Script
|
||||||
|
|
||||||
|
We've created a convenient script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./check-gpu-api.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
**Output:**
|
||||||
|
```
|
||||||
|
==========================================
|
||||||
|
Ollama GPU Status Check
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
1. GPU Status:
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"status": "success",
|
||||||
|
"gpu_in_use": true,
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
2. Performance Test:
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"duration_seconds": 3.2,
|
||||||
|
"performance": "Excellent (GPU likely active)"
|
||||||
|
}
|
||||||
|
|
||||||
|
3. Available Models:
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"models": ["phi3:latest", "llama3:8b"]
|
||||||
|
}
|
||||||
|
|
||||||
|
==========================================
|
||||||
|
Quick Summary:
|
||||||
|
==========================================
|
||||||
|
GPU Status: GPU Active
|
||||||
|
Performance: 3.2s - Excellent (GPU likely active)
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### GET /api/ollama/gpu-status
|
||||||
|
Check if GPU is being used by Ollama.
|
||||||
|
|
||||||
|
**Response Fields:**
|
||||||
|
- `gpu_available` - GPU hardware detected
|
||||||
|
- `gpu_in_use` - Ollama actively using GPU
|
||||||
|
- `gpu_details` - GPU configuration details
|
||||||
|
- `recommendation` - Setup suggestions
|
||||||
|
|
||||||
|
### GET /api/ollama/test
|
||||||
|
Test Ollama performance with a sample prompt.
|
||||||
|
|
||||||
|
**Response Fields:**
|
||||||
|
- `duration_seconds` - Time taken for test
|
||||||
|
- `performance` - Performance rating
|
||||||
|
- `recommendation` - Performance suggestions
|
||||||
|
|
||||||
|
### GET /api/ollama/models
|
||||||
|
List all available models.
|
||||||
|
|
||||||
|
**Response Fields:**
|
||||||
|
- `models` - Array of model names
|
||||||
|
- `current_model` - Active model from .env
|
||||||
|
|
||||||
|
### GET /api/ollama/ping
|
||||||
|
Test basic Ollama connectivity.
|
||||||
|
|
||||||
|
### GET /api/ollama/config
|
||||||
|
View current Ollama configuration.
|
||||||
|
|
||||||
|
## Interpreting Results
|
||||||
|
|
||||||
|
### GPU Status
|
||||||
|
|
||||||
|
**✅ GPU Active:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"gpu_in_use": true,
|
||||||
|
"gpu_available": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
- GPU acceleration is working
|
||||||
|
- Expect 5-10x faster processing
|
||||||
|
|
||||||
|
**❌ CPU Mode:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"gpu_in_use": false,
|
||||||
|
"gpu_available": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
- Running on CPU only
|
||||||
|
- Slower processing (15-30s per article)
|
||||||
|
|
||||||
|
### Performance Ratings
|
||||||
|
|
||||||
|
| Duration | Rating | Mode |
|
||||||
|
|----------|--------|------|
|
||||||
|
| < 5s | Excellent | GPU likely active |
|
||||||
|
| 5-15s | Good | GPU may be active |
|
||||||
|
| 15-30s | Fair | CPU mode |
|
||||||
|
| > 30s | Slow | CPU mode, GPU recommended |
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### GPU Not Detected
|
||||||
|
|
||||||
|
1. **Check if GPU compose is used:**
|
||||||
|
```bash
|
||||||
|
docker-compose ps
|
||||||
|
# Should show GPU configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Verify NVIDIA runtime:**
|
||||||
|
```bash
|
||||||
|
docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Check Ollama logs:**
|
||||||
|
```bash
|
||||||
|
docker-compose logs ollama | grep -i gpu
|
||||||
|
```
|
||||||
|
|
||||||
|
### Slow Performance
|
||||||
|
|
||||||
|
If performance test shows > 15s:
|
||||||
|
|
||||||
|
1. **Enable GPU acceleration:**
|
||||||
|
```bash
|
||||||
|
docker-compose down
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Verify GPU is available:**
|
||||||
|
```bash
|
||||||
|
nvidia-smi
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Check model size:**
|
||||||
|
- Larger models = slower
|
||||||
|
- Try `phi3:latest` for fastest performance
|
||||||
|
|
||||||
|
### Connection Errors
|
||||||
|
|
||||||
|
If API returns connection errors:
|
||||||
|
|
||||||
|
1. **Check backend is running:**
|
||||||
|
```bash
|
||||||
|
docker-compose ps backend
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Check Ollama is running:**
|
||||||
|
```bash
|
||||||
|
docker-compose ps ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Restart services:**
|
||||||
|
```bash
|
||||||
|
docker-compose restart backend ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring in Production
|
||||||
|
|
||||||
|
### Automated Checks
|
||||||
|
|
||||||
|
Add to your monitoring:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check GPU status every 5 minutes
|
||||||
|
*/5 * * * * curl -s http://localhost:5001/api/ollama/gpu-status | \
|
||||||
|
python3 -c "import json,sys; data=json.load(sys.stdin); \
|
||||||
|
sys.exit(0 if data.get('gpu_in_use') else 1)"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Performance Alerts
|
||||||
|
|
||||||
|
Alert if performance degrades:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Alert if response time > 20s
|
||||||
|
DURATION=$(curl -s http://localhost:5001/api/ollama/test | \
|
||||||
|
python3 -c "import json,sys; print(json.load(sys.stdin).get('duration_seconds', 999))")
|
||||||
|
|
||||||
|
if (( $(echo "$DURATION > 20" | bc -l) )); then
|
||||||
|
echo "ALERT: Ollama performance degraded: ${DURATION}s"
|
||||||
|
fi
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example: Full Health Check
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# health-check.sh
|
||||||
|
|
||||||
|
echo "Checking Ollama Health..."
|
||||||
|
|
||||||
|
# 1. GPU Status
|
||||||
|
GPU=$(curl -s http://localhost:5001/api/ollama/gpu-status | \
|
||||||
|
python3 -c "import json,sys; print('GPU' if json.load(sys.stdin).get('gpu_in_use') else 'CPU')")
|
||||||
|
|
||||||
|
# 2. Performance
|
||||||
|
PERF=$(curl -s http://localhost:5001/api/ollama/test | \
|
||||||
|
python3 -c "import json,sys; data=json.load(sys.stdin); print(f\"{data.get('duration_seconds')}s\")")
|
||||||
|
|
||||||
|
# 3. Models
|
||||||
|
MODELS=$(curl -s http://localhost:5001/api/ollama/models | \
|
||||||
|
python3 -c "import json,sys; print(len(json.load(sys.stdin).get('models', [])))")
|
||||||
|
|
||||||
|
echo "Mode: $GPU"
|
||||||
|
echo "Performance: $PERF"
|
||||||
|
echo "Models: $MODELS"
|
||||||
|
|
||||||
|
# Exit with error if CPU mode and slow
|
||||||
|
if [ "$GPU" = "CPU" ] && (( $(echo "$PERF > 20" | bc -l) )); then
|
||||||
|
echo "WARNING: Running in CPU mode with slow performance"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ Health check passed"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related Documentation
|
||||||
|
|
||||||
|
- [GPU_SETUP.md](GPU_SETUP.md) - GPU setup guide
|
||||||
|
- [OLLAMA_SETUP.md](OLLAMA_SETUP.md) - Ollama configuration
|
||||||
|
- [CHANGING_AI_MODEL.md](CHANGING_AI_MODEL.md) - Model switching guide
|
||||||
44
pull-ollama-model.sh
Executable file
44
pull-ollama-model.sh
Executable file
@@ -0,0 +1,44 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Pull Ollama model from .env file
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Load OLLAMA_MODEL from .env
|
||||||
|
if [ -f backend/.env ]; then
|
||||||
|
export $(grep -v '^#' backend/.env | grep OLLAMA_MODEL | xargs)
|
||||||
|
else
|
||||||
|
echo "Error: backend/.env file not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Default to phi3:latest if not set
|
||||||
|
MODEL=${OLLAMA_MODEL:-phi3:latest}
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "Pulling Ollama Model: $MODEL"
|
||||||
|
echo "=========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if Ollama container is running
|
||||||
|
if ! docker-compose ps ollama | grep -q "Up"; then
|
||||||
|
echo "Error: Ollama container is not running"
|
||||||
|
echo "Start it with: docker-compose up -d ollama"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Pulling model via Ollama API..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Pull the model
|
||||||
|
docker-compose exec -T ollama ollama pull "$MODEL"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "✓ Model $MODEL pulled successfully!"
|
||||||
|
echo "=========================================="
|
||||||
|
echo ""
|
||||||
|
echo "Verify with:"
|
||||||
|
echo " docker-compose exec ollama ollama list"
|
||||||
|
echo ""
|
||||||
|
echo "Test with:"
|
||||||
|
echo " curl http://localhost:5001/api/ollama/test"
|
||||||
57
scripts/setup-ollama-model.sh
Executable file
57
scripts/setup-ollama-model.sh
Executable file
@@ -0,0 +1,57 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Ollama Model Setup Script
|
||||||
|
# Checks if model exists and downloads if needed
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
MODEL="${OLLAMA_MODEL:-phi3:latest}"
|
||||||
|
|
||||||
|
echo "========================================"
|
||||||
|
echo "Ollama Model Setup"
|
||||||
|
echo "Target model: $MODEL"
|
||||||
|
echo "========================================"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Wait for Ollama to be ready
|
||||||
|
echo "Waiting for Ollama service..."
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Check if model exists
|
||||||
|
echo "Checking if model exists..."
|
||||||
|
MODELS=$(curl -s http://ollama:11434/api/tags 2>/dev/null || echo "")
|
||||||
|
|
||||||
|
if [ -z "$MODELS" ]; then
|
||||||
|
echo "⚠ Warning: Could not connect to Ollama"
|
||||||
|
echo "Attempting to pull model anyway..."
|
||||||
|
curl -X POST http://ollama:11434/api/pull -d "{\"name\":\"$MODEL\"}"
|
||||||
|
echo ""
|
||||||
|
echo "✓ Model pull initiated: $MODEL"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if our model is in the list
|
||||||
|
if echo "$MODELS" | grep -q "\"$MODEL\""; then
|
||||||
|
echo "✓ Model already exists: $MODEL"
|
||||||
|
echo "Skipping download."
|
||||||
|
echo ""
|
||||||
|
echo "Available models:"
|
||||||
|
echo "$MODELS" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 | sed 's/^/ - /'
|
||||||
|
else
|
||||||
|
echo "⬇ Model not found, downloading: $MODEL"
|
||||||
|
echo "This may take 2-10 minutes depending on model size..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Pull the model
|
||||||
|
curl -X POST http://ollama:11434/api/pull -d "{\"name\":\"$MODEL\"}"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✓ Model download initiated: $MODEL"
|
||||||
|
echo ""
|
||||||
|
echo "Monitor progress with:"
|
||||||
|
echo " docker-compose logs -f ollama"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "========================================"
|
||||||
|
echo "Setup complete!"
|
||||||
|
echo "========================================"
|
||||||
Reference in New Issue
Block a user