from flask import Blueprint, jsonify
from config import Config
from services.ollama_service import call_ollama, list_ollama_models
import os

ollama_bp = Blueprint('ollama', __name__)


@ollama_bp.route('/api/ollama/ping', methods=['GET', 'POST'])
def ping_ollama():
    """Test connection to Ollama server"""
    try:
        # Check if Ollama is enabled
        if not Config.OLLAMA_ENABLED:
            return jsonify({
                'status': 'disabled',
                'message': 'Ollama is not enabled. Set OLLAMA_ENABLED=true in your .env file.',
                'ollama_config': {
                    'base_url': Config.OLLAMA_BASE_URL,
                    'model': Config.OLLAMA_MODEL,
                    'enabled': False
                }
            }), 200
        
        # Send a simple test prompt
        test_prompt = "Say 'Hello! I am connected and working.' in one sentence."
        system_prompt = "You are a helpful assistant. Respond briefly and concisely."
        
        response_text, error_message = call_ollama(test_prompt, system_prompt)
        
        if response_text:
            return jsonify({
                'status': 'success',
                'message': 'Successfully connected to Ollama',
                'response': response_text,
                'ollama_config': {
                    'base_url': Config.OLLAMA_BASE_URL,
                    'model': Config.OLLAMA_MODEL,
                    'enabled': True
                }
            }), 200
        else:
            # Try to get available models for better error message
            available_models, _ = list_ollama_models()
            
            troubleshooting = {
                'check_server': f'Verify Ollama is running at {Config.OLLAMA_BASE_URL}',
                'check_model': f'Verify model "{Config.OLLAMA_MODEL}" is available (run: ollama list)',
                'test_connection': f'Test manually: curl {Config.OLLAMA_BASE_URL}/api/generate -d \'{{"model":"{Config.OLLAMA_MODEL}","prompt":"test"}}\''
            }
            
            if available_models:
                troubleshooting['available_models'] = available_models
                troubleshooting['suggestion'] = f'Try setting OLLAMA_MODEL to one of: {", ".join(available_models[:5])}'
            
            return jsonify({
                'status': 'error',
                'message': error_message or 'Failed to get response from Ollama',
                'error_details': error_message,
                'ollama_config': {
                    'base_url': Config.OLLAMA_BASE_URL,
                    'model': Config.OLLAMA_MODEL,
                    'enabled': True
                },
                'troubleshooting': troubleshooting
            }), 500
            
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': f'Error connecting to Ollama: {str(e)}',
            'ollama_config': {
                'base_url': Config.OLLAMA_BASE_URL,
                'model': Config.OLLAMA_MODEL,
                'enabled': Config.OLLAMA_ENABLED
            }
        }), 500


@ollama_bp.route('/api/ollama/config', methods=['GET'])
def get_ollama_config():
    """Get current Ollama configuration (for debugging)"""
    try:
        from pathlib import Path
        backend_dir = Path(__file__).parent.parent
        env_path = backend_dir / '.env'
        
        return jsonify({
            'ollama_config': {
                'base_url': Config.OLLAMA_BASE_URL,
                'model': Config.OLLAMA_MODEL,
                'enabled': Config.OLLAMA_ENABLED,
                'has_api_key': bool(Config.OLLAMA_API_KEY)
            },
            'env_file_path': str(env_path),
            'env_file_exists': env_path.exists(),
            'current_working_directory': os.getcwd()
        }), 200
    except Exception as e:
        return jsonify({
            'error': str(e),
            'ollama_config': {
                'base_url': Config.OLLAMA_BASE_URL,
                'model': Config.OLLAMA_MODEL,
                'enabled': Config.OLLAMA_ENABLED
            }
        }), 500


@ollama_bp.route('/api/ollama/models', methods=['GET'])
def get_ollama_models():
    """List available models on Ollama server"""
    try:
        if not Config.OLLAMA_ENABLED:
            return jsonify({
                'status': 'disabled',
                'message': 'Ollama is not enabled. Set OLLAMA_ENABLED=true in your .env file.',
                'ollama_config': {
                    'base_url': Config.OLLAMA_BASE_URL,
                    'model': Config.OLLAMA_MODEL,
                    'enabled': False
                }
            }), 200
        
        models, error_message = list_ollama_models()
        
        if models is not None:
            return jsonify({
                'status': 'success',
                'models': models,
                'current_model': Config.OLLAMA_MODEL,
                'ollama_config': {
                    'base_url': Config.OLLAMA_BASE_URL,
                    'model': Config.OLLAMA_MODEL,
                    'enabled': True
                }
            }), 200
        else:
            return jsonify({
                'status': 'error',
                'message': error_message or 'Failed to list models',
                'ollama_config': {
                    'base_url': Config.OLLAMA_BASE_URL,
                    'model': Config.OLLAMA_MODEL,
                    'enabled': True
                }
            }), 500
            
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': f'Error listing models: {str(e)}',
            'ollama_config': {
                'base_url': Config.OLLAMA_BASE_URL,
                'model': Config.OLLAMA_MODEL,
                'enabled': Config.OLLAMA_ENABLED
            }
        }), 500


@ollama_bp.route('/api/ollama/gpu-status', methods=['GET'])
def get_gpu_status():
    """Check if Ollama is using GPU acceleration"""
    import requests
    
    try:
        if not Config.OLLAMA_ENABLED:
            return jsonify({
                'status': 'disabled',
                'message': 'Ollama is not enabled',
                'gpu_available': False,
                'gpu_in_use': False
            }), 200
        
        # Get Ollama process info
        try:
            response = requests.get(
                f"{Config.OLLAMA_BASE_URL}/api/ps",
                timeout=5
            )
            
            if response.status_code == 200:
                ps_data = response.json()
                
                # Check if any models are loaded
                models_loaded = ps_data.get('models', [])
                
                gpu_info = {
                    'status': 'success',
                    'ollama_running': True,
                    'models_loaded': len(models_loaded),
                    'gpu_available': False,
                    'gpu_in_use': False,
                    'gpu_details': None
                }
                
                # Check for GPU usage in loaded models
                for model in models_loaded:
                    # Check various GPU indicators
                    gpu_layers = model.get('gpu_layers', 0)
                    details = model.get('details', {})
                    
                    # Check if GPU is mentioned in any field
                    if (gpu_layers > 0 or 
                        'gpu' in str(model).lower() or 
                        'cuda' in str(model).lower() or
                        details.get('families', []) and 'gpu' in str(details.get('families', [])).lower()):
                        gpu_info['gpu_in_use'] = True
                        gpu_info['gpu_available'] = True
                        gpu_info['gpu_details'] = {
                            'model': model.get('name', 'unknown'),
                            'gpu_layers': gpu_layers,
                            'size': model.get('size', 0),
                            'size_vram': model.get('size_vram', 0)
                        }
                        break
                
                # If no models loaded, check Docker container for GPU
                if not gpu_info['gpu_in_use']:
                    try:
                        import subprocess
                        # Check if nvidia-smi works in ollama container
                        result = subprocess.run(
                            ['docker', 'exec', 'munich-news-ollama', 'nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
                            capture_output=True,
                            text=True,
                            timeout=5
                        )
                        if result.returncode == 0 and result.stdout.strip():
                            gpu_info['gpu_available'] = True
                            gpu_info['gpu_details'] = {
                                'gpu_name': result.stdout.strip(),
                                'note': 'GPU available but no model currently loaded'
                            }
                    except:
                        pass
                
                # Try to get system info
                try:
                    tags_response = requests.get(
                        f"{Config.OLLAMA_BASE_URL}/api/tags",
                        timeout=5
                    )
                    if tags_response.status_code == 200:
                        tags_data = tags_response.json()
                        gpu_info['available_models'] = [m.get('name') for m in tags_data.get('models', [])]
                except:
                    pass
                
                # Add recommendation
                if not gpu_info['gpu_in_use']:
                    gpu_info['recommendation'] = (
                        "GPU not detected. To enable GPU acceleration:\n"
                        "1. Ensure NVIDIA GPU is available\n"
                        "2. Install nvidia-docker2\n"
                        "3. Use: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d\n"
                        "4. See docs/GPU_SETUP.md for details"
                    )
                else:
                    gpu_info['recommendation'] = "✓ GPU acceleration is active!"
                
                return jsonify(gpu_info), 200
            else:
                return jsonify({
                    'status': 'error',
                    'message': f'Ollama API returned status {response.status_code}',
                    'ollama_running': False,
                    'gpu_available': False,
                    'gpu_in_use': False
                }), 500
                
        except requests.exceptions.ConnectionError:
            return jsonify({
                'status': 'error',
                'message': f'Cannot connect to Ollama at {Config.OLLAMA_BASE_URL}',
                'ollama_running': False,
                'gpu_available': False,
                'gpu_in_use': False,
                'troubleshooting': {
                    'check_container': 'docker-compose ps ollama',
                    'check_logs': 'docker-compose logs ollama',
                    'restart': 'docker-compose restart ollama'
                }
            }), 500
            
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': f'Error checking GPU status: {str(e)}',
            'gpu_available': False,
            'gpu_in_use': False
        }), 500


@ollama_bp.route('/api/ollama/test', methods=['GET'])
def test_ollama_performance():
    """Test Ollama performance and measure response time"""
    import time
    
    try:
        if not Config.OLLAMA_ENABLED:
            return jsonify({
                'status': 'disabled',
                'message': 'Ollama is not enabled'
            }), 200
        
        # Test prompt
        test_prompt = "Summarize this in 20 words: Munich is the capital of Bavaria, Germany. It is known for Oktoberfest, BMW, and beautiful architecture."
        
        start_time = time.time()
        response_text, error_message = call_ollama(test_prompt, "You are a helpful assistant.")
        duration = time.time() - start_time
        
        if response_text:
            # Estimate performance
            if duration < 5:
                performance = "Excellent (GPU likely active)"
            elif duration < 15:
                performance = "Good (GPU may be active)"
            elif duration < 30:
                performance = "Fair (CPU mode)"
            else:
                performance = "Slow (CPU mode, consider GPU)"
            
            return jsonify({
                'status': 'success',
                'response': response_text,
                'duration_seconds': round(duration, 2),
                'performance': performance,
                'model': Config.OLLAMA_MODEL,
                'recommendation': (
                    "GPU acceleration recommended" if duration > 15 
                    else "Performance is good"
                )
            }), 200
        else:
            return jsonify({
                'status': 'error',
                'message': error_message or 'Failed to get response',
                'duration_seconds': round(duration, 2)
            }), 500
            
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': f'Error testing Ollama: {str(e)}'
        }), 500