import difflib import json import tempfile from pathlib import Path from typing import List, Tuple, Optional, Dict, Any import gradio as gr from config import settings from ollama_utils import ( ensure_ollama_running, is_model_available, ask_ollama_stream, pull_model_with_progress ) from file_processing import read_uploaded_files, guess_lang_from_content ChatMessage = Dict[str, str] ChatHistory = List[ChatMessage] LegacyHistory = List[Tuple[Optional[str], Optional[str]]] def _normalize_history(history: Optional[Any]) -> ChatHistory: """Asegura que el historial sea una lista de mensajes con role/content.""" if not history: return [] normalized: ChatHistory = [] for item in history: if isinstance(item, dict) and "role" in item and "content" in item: normalized.append(item) elif isinstance(item, (list, tuple)) and len(item) == 2: user, bot = item if user: normalized.append({"role": "user", "content": str(user)}) if bot: normalized.append({"role": "assistant", "content": str(bot)}) return normalized def _messages_to_pairs(messages: ChatHistory) -> LegacyHistory: """Convierte mensajes secuenciales a tuplas (usuario, asistente).""" pairs: LegacyHistory = [] pending_user: Optional[str] = None for msg in messages: role = msg.get("role") content = msg.get("content", "") if role == "user": if pending_user is not None: pairs.append((pending_user, None)) pending_user = content elif role == "assistant": if pending_user is not None: pairs.append((pending_user, content)) pending_user = None else: pairs.append((None, content)) if pending_user is not None: pairs.append((pending_user, None)) return pairs def _init_state(): """Inicializa un estado de aplicación vacío.""" return { "history": [], "last_files": None, "downloaded_models": [], } def build_prompt(user_text: str, files_blob: str, language: str) -> str: """Construye el prompt para el modelo.""" parts = [] if files_blob.strip(): lang_detected = guess_lang_from_content(files_blob) or language parts.append(f"Basado en el siguiente contexto y archivos adjuntos (lenguaje: {lang_detected}):") parts.append(files_blob) parts.append("\n---") parts.append("Responde a la siguiente instrucción del usuario:") parts.append(user_text) return "\n\n".join(parts) def main_chat( app_state: Dict[str, Any], history: LegacyHistory, # Gradio pasa el historial como pares user_text: str, model: str, files, ): if not app_state: app_state = _init_state() downloaded_models: List[str] = app_state.setdefault("downloaded_models", []) history_messages = _normalize_history(history) if not user_text.strip() and not files: yield app_state, history, user_text, files return # Check Ollama status if not ensure_ollama_running(): gr.Warning("Ollama no está en ejecución. Por favor, inicia el servicio de Ollama.") yield app_state, history, user_text, files return # Add user message to history so the user sees it immediately history_messages.append({"role": "user", "content": user_text}) model_ready = model in downloaded_models or is_model_available(model) if not model_ready: gr.Info(f"El modelo '{model}' no está disponible localmente. Intentando descargarlo...") download_msg = {"role": "assistant", "content": f"📥 Descargando modelo '{model}'..."} history_messages.append(download_msg) yield app_state, _messages_to_pairs(history_messages), "", files pull_status = "" pull_success = False for status in pull_model_with_progress(model): pull_status = status download_msg["content"] = status yield app_state, _messages_to_pairs(history_messages), "", files if status.startswith("✅"): pull_success = True if not pull_success: gr.Error(f"No se pudo descargar el modelo '{model}'. Por favor, verifica el nombre o hazlo manualmente.") return if model not in downloaded_models: downloaded_models.append(model) gr.Info(f"Modelo '{model}' descargado con éxito.") # Prepare inputs files_blob, preview, _ = read_uploaded_files(files, "") user_prompt = build_prompt(user_text, files_blob, "Python") # Default language, can be improved system_prompt = ( "Eres un asistente de IA servicial, experto en desarrollo de software y una amplia gama de temas. " "Responde siempre en español, de forma clara y concisa. " "Si se te pide código, formátéalo en bloques de markdown con la etiqueta del lenguaje correspondiente." ) # Stream response assistant_message = {"role": "assistant", "content": ""} history_messages.append(assistant_message) # The history sent to the model should not include the latest empty assistant message model_history_pairs = _messages_to_pairs(history_messages[:-1]) full_response = "" for chunk in ask_ollama_stream( model=model, system_prompt=system_prompt, history=model_history_pairs, new_prompt=user_prompt, temperature=0.4, # Sensible default top_p=0.9, # Sensible default max_tokens=4096, # Sensible default ): full_response += chunk assistant_message["content"] = full_response yield app_state, _messages_to_pairs(history_messages), "", files # Truncate history if too long if len(history_messages) > settings.MAX_CHAT_TURNS * 2: history_messages = history_messages[-(settings.MAX_CHAT_TURNS * 2):] app_state["history"] = history_messages yield app_state, _messages_to_pairs(history_messages), "", files