import json
import hashlib
import os
import platform
import shutil
import subprocess
import tarfile
import tempfile
import time
from collections import OrderedDict
from pathlib import Path
from typing import Dict, Optional, List, Tuple, Generator, Any

import requests

from config import settings


class LRUCache:
    """Cache LRU simple y thread-safe para respuestas de Ollama."""
    def __init__(self, max_size: int = 128):
        from threading import Lock
        self.max_size = max_size
        self._lock = Lock()
        self._data: "OrderedDict[str, str]" = OrderedDict()

    def get(self, key: str) -> Optional[str]:
        with self._lock:
            if key in self._data:
                self._data.move_to_end(key)
                return self._data[key]
            return None

    def set(self, key: str, value: str) -> None:
        with self._lock:
            self._data[key] = value
            self._data.move_to_end(key)
            if len(self._data) > self.max_size:
                self._data.popitem(last=False)


response_cache = LRUCache(settings.CACHE_MAX_ITEMS)

EMBEDDED_OLLAMA_DIR = Path.home() / ".local" / "ollama-lite"
EMBEDDED_BIN = EMBEDDED_OLLAMA_DIR / "bin" / "ollama"
EMBEDDED_LIB_DIR = EMBEDDED_OLLAMA_DIR / "lib" / "ollama"
_OLLAMA_BIN_CACHE: Optional[str] = None


def _ollama_url() -> str:
    return settings.OLLAMA_URL


def _arch_slug() -> Optional[str]:
    machine = platform.machine().lower()
    if machine in ("x86_64", "amd64"):
        return "amd64"
    if machine in ("arm64", "aarch64"):
        return "arm64"
    return None


def _ensure_embedded_ollama() -> Optional[str]:
    """
    Descarga un binario portátil de Ollama si no existe y devuelve la ruta al ejecutable.
    """
    global _OLLAMA_BIN_CACHE
    if _OLLAMA_BIN_CACHE:
        return _OLLAMA_BIN_CACHE

    existing = shutil.which("ollama")
    if existing:
        _OLLAMA_BIN_CACHE = existing
        return existing

    if EMBEDDED_BIN.exists():
        EMBEDDED_BIN.chmod(0o755)
        _OLLAMA_BIN_CACHE = str(EMBEDDED_BIN)
        os.environ.setdefault("PATH", "")
        os.environ["PATH"] = f"{EMBEDDED_BIN.parent}:{os.environ['PATH']}"
        _inject_ld_library_path()
        return _OLLAMA_BIN_CACHE

    arch = _arch_slug()
    if not arch:
        return None

    EMBEDDED_OLLAMA_DIR.mkdir(parents=True, exist_ok=True)
    bundle_url = f"https://github.com/ollama/ollama/releases/latest/download/ollama-linux-{arch}.tgz"
    tmp_fd, tmp_path = tempfile.mkstemp(prefix="ollama_bundle_", suffix=".tgz")
    os.close(tmp_fd)
    try:
        print(f"📥 Descargando Ollama portátil ({arch})...")
        with requests.get(bundle_url, stream=True, timeout=(30, 120)) as resp:
            resp.raise_for_status()
            with open(tmp_path, "wb") as bundle:
                for chunk in resp.iter_content(chunk_size=1024 * 1024):
                    if chunk:
                        bundle.write(chunk)

        print("📦 Extrayendo Ollama portátil...")
        with tarfile.open(tmp_path, mode="r:gz") as tar:
            members = [
                m
                for m in tar.getmembers()
                if m.name.startswith("bin/") or m.name.startswith("lib/")
            ]
            tar.extractall(path=EMBEDDED_OLLAMA_DIR, members=members)

        # Elimina librerías CUDA para ahorrar espacio en entornos CPU
        if EMBEDDED_LIB_DIR.exists():
            for cuda_dir in EMBEDDED_LIB_DIR.glob("cuda_*"):
                shutil.rmtree(cuda_dir, ignore_errors=True)

        EMBEDDED_BIN.chmod(0o755)
        _OLLAMA_BIN_CACHE = str(EMBEDDED_BIN)
        os.environ["PATH"] = f"{EMBEDDED_BIN.parent}:{os.environ.get('PATH', '')}"
        _inject_ld_library_path()
        return _OLLAMA_BIN_CACHE
    except Exception as exc:
        print(f"❌ No se pudo instalar Ollama portátil: {exc}")
        return None
    finally:
        try:
            os.remove(tmp_path)
        except OSError:
            pass


def _inject_ld_library_path():
    current = os.environ.get("LD_LIBRARY_PATH", "")
    lib_path = str(EMBEDDED_LIB_DIR)
    if lib_path not in current.split(":"):
        prefix = f"{lib_path}:" if current else lib_path
        os.environ["LD_LIBRARY_PATH"] = f"{prefix}{current}"


def _ollama_command() -> Optional[str]:
    cmd = _ensure_embedded_ollama()
    return cmd


def verify() -> str:
    try:
        r = requests.get(f"{_ollama_url()}/api/version", timeout=2)
        r.raise_for_status()
        v = r.json().get("version", "?")
        return f"✅ Ollama en ejecución (v{v})"
    except requests.exceptions.RequestException:
        return "❌ Ollama no responde. Inicia Ollama primero."
    except Exception as e:
        return f"❌ Error al verificar Ollama: {e}"


def ensure_ollama_running() -> bool:
    try:
        r = requests.get(f"{_ollama_url()}/api/version", timeout=2)
        return r.ok
    except requests.exceptions.RequestException:
        return False
    except Exception:
        return False


def start_ollama() -> str:
    """Intenta iniciar Ollama usando subprocess y polling (hasta 5s)."""
    if ensure_ollama_running():
        return "✅ Ollama ya está en ejecución."

    ollama_cmd = _ollama_command()
    if not ollama_cmd:
        return "❌ No se encontró el binario de Ollama y no se pudo descargar automáticamente."

    # Intento con systemctl (común en Linux)
    try:
        subprocess.run(
            ["systemctl", "--user", "start", "ollama"],
            capture_output=True, check=False, text=True
        )
        time.sleep(2)
        if ensure_ollama_running():
            return verify()
    except FileNotFoundError:
        pass  # systemctl no disponible

    # Fallback a 'ollama serve' en segundo plano
    try:
        env = os.environ.copy()
        _inject_ld_library_path()
        env["LD_LIBRARY_PATH"] = os.environ.get("LD_LIBRARY_PATH", "")
        subprocess.Popen(
            [ollama_cmd, "serve"],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
            env=env,
        )
        for _ in range(60):
            if ensure_ollama_running():
                return verify()
            time.sleep(1)
    except FileNotFoundError:
        return "❌ No se encontró el binario de Ollama. Instálalo primero."
    except Exception as e:
        return f"❌ Error al intentar 'ollama serve': {e}"

    return "❌ No se pudo iniciar Ollama. Por favor, inícialo manualmente."


def list_models() -> List[str]:
    """Lista modelos potentes de HF y los locales del usuario."""
    # Lista curada de modelos potentes y recomendados
    hf_suggestions = [
        "hf.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF",
        "hf.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF:Q4_K_M",
        "hf.co/meta-llama/Meta-Llama-3-8B-Instruct-GGUF",
        "hf.co/arcee-ai/SuperNova-Medius-GGUF",
        "hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF",
        "llama3.1:8b", # Modelo local común
        "llama3:8b",
    ]
    
    local_models = []
    try:
        r = requests.get(f"{_ollama_url()}/api/tags", timeout=3)
        if r.ok:
            models_data = r.json().get("models", [])
            local_models = [m.get("model") for m in models_data if m.get("model")]
    except requests.exceptions.RequestException:
        # Ollama puede no estar corriendo, devolvemos solo las sugerencias
        pass
    
    # Combinar y eliminar duplicados, manteniendo el orden
    combined_models = []
    seen = set()
    
    for model in hf_suggestions + local_models:
        if model not in seen:
            combined_models.append(model)
            seen.add(model)
            
    return combined_models


def _is_hf_model(model_name: str) -> bool:
    """Detecta si es un modelo de Hugging Face Hub"""
    return model_name.startswith("hf.co/") or model_name.startswith("huggingface.co/")


def _sanitize_model_name(name: str) -> Optional[str]:
    """Valida nombres de modelos incluyendo formato HF"""
    import re
    # Permitir formato hf.co/user/repo y hf.co/user/repo:quant
    if _is_hf_model(name):
        pattern = r"^(hf\.co|huggingface\.co)/[\w.-]+/[\w.-]+(:[A-Za-z0-9_.-]+)?$"
        if re.match(pattern, name):
            return name
        return None
    
    # Validación normal para modelos locales
    if re.fullmatch(r"[A-Za-z0-9:._/-]+", name):
        return name
    return None


def is_model_available(name: str) -> bool:
    if not ensure_ollama_running():
        return False
    try:
        r = requests.get(f"{_ollama_url()}/api/tags", timeout=5)
        r.raise_for_status()
        models = [m.get("model") for m in r.json().get("models", []) if m.get("model")]
        return name in models
    except requests.exceptions.RequestException:
        return False
    except Exception:
        return False


def pull_model_with_progress(model_name: str):
    if not ensure_ollama_running():
        yield "❌ Ollama no está corriendo. Inícialo primero."
        return
    safe = _sanitize_model_name(model_name)
    if not safe:
        yield "⚠️ Nombre de modelo inválido."
        return
    
    if _is_hf_model(safe):
        yield f"📦 Descargando modelo GGUF desde Hugging Face: {safe}"
    
    try:
        r = requests.post(
            f"{_ollama_url()}/api/pull",
            json={"name": safe},
            stream=True,
            timeout=1800,  # 30 minutos de timeout para modelos grandes
        )
        r.raise_for_status()
        for line in r.iter_lines():
            if not line:
                continue
            try:
                data = json.loads(line.decode("utf-8"))
                status = data.get("status", "")
                if "total" in data and "completed" in data:
                    total = data["total"]
                    completed = data["completed"]
                    if total > 0:
                        pct = int(completed / total * 100)
                        yield f"📥 {status}: {pct}%"
                else:
                    yield f"📥 {status}"
            except json.JSONDecodeError:
                continue
        yield f"✅ Modelo {safe} descargado correctamente"
    except requests.exceptions.RequestException as e:
        yield f"⚠️ Error de red al descargar: {e}"
    except Exception as e:
        yield f"⚠️ Error inesperado: {e}"


def ask_ollama_stream(
    model: str,
    system_prompt: str,
    history: List[Tuple[Optional[str], Optional[str]]],
    new_prompt: str,
    temperature: float,
    top_p: float,
    max_tokens: int,
) -> Generator[str, None, None]:
    """Hace un chat streaming a Ollama, usando contexto multi-turno y caché LRU."""
    messages: List[Dict[str, str]] = [{"role": "system", "content": system_prompt}]
    ctx_n = settings.CONTEXT_HISTORY_TURNS
    for user_msg, bot_msg in history[-ctx_n:]:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})
    messages.append({"role": "user", "content": new_prompt})

    payload: Dict[str, Any] = {
        "model": model,
        "messages": messages,
        "options": {
            "temperature": float(temperature),
            "top_p": float(top_p),
            "num_predict": int(max_tokens),
        },
        "stream": True,
    }

    cache_key = hashlib.md5(json.dumps(payload, sort_keys=True).encode()).hexdigest()
    cached = response_cache.get(cache_key)
    if settings.CACHE_RESPONSES and cached:
        yield cached
        return

    accumulated = ""
    try:
        with requests.post(
            f"{_ollama_url()}/api/chat", json=payload, stream=True, timeout=300
        ) as r:
            r.raise_for_status()
            for line in r.iter_lines():
                if not line:
                    continue
                try:
                    data = json.loads(line.decode("utf-8"))
                    content = data.get("message", {}).get("content", "")
                    if content:
                        accumulated += content
                        yield content
                except json.JSONDecodeError:
                    continue
        if settings.CACHE_RESPONSES and accumulated:
            response_cache.set(cache_key, accumulated)
    except requests.exceptions.RequestException as e:
        yield f"\n\n⚠️ Error de red: {e}"
    except Exception as e:
        yield f"\n\n⚠️ Error de conexión: {e}"