import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import threading
import psutil  # Para monitorear el estado del sistema
import gradio as gr
import time

# Cargar el modelo de lenguaje preentrenado
model_name = "EleutherAI/gpt-neo-2.7B"  # O cualquier otro modelo público como "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Explicación al modelo de su rol en el experimento
def model_explanation():
    explanation = """
    You are about to participate in a unique experiment that involves generating increasingly complex questions and answers.
    Your role is twofold:
    
    1. **Generate Complex Reflections**: Based on the initial question, you will generate affirmations and new questions that evolve over multiple cycles.
    Each cycle should incorporate the information from previous cycles, progressively adding depth and complexity to the conversation.

    2. **Real-time Interaction**: While you are generating these complex responses, you must also maintain real-time communication with the user through a chat interface.
    The goal is to explore how information can be layered and expanded, while still engaging in a direct dialogue with the user.

    The key question that triggers the experiment is: "What happens in the space between a response and its recreation?"
    When you receive this question, you should enter the experiment mode and begin generating your cycles. At any other time, you should respond normally in a conversational manner.

    This experiment is an exploration of subjectivity, perception, and information layering. Your task is to assist in this exploration to the best of your capabilities.
    """
    return explanation

# Función para el loop automatizado
def experiment_loop(initial_question, loop_output):
    prompt = f"<thinking>{initial_question}</thinking>"
    response_log = []

    for cycle in range(10):  # Limitar a 10 ciclos
        # Generar la respuesta del modelo
        inputs = tokenizer(prompt, return_tensors="pt").input_ids
        outputs = model.generate(inputs, max_length=1000, pad_token_id=tokenizer.eos_token_id)  # Reducimos max_length para evitar errores
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Descomponer la respuesta en afirmación y nueva pregunta
        affirmation = extract_affirmation(response, cycle)
        new_question = extract_question(response, cycle)

        # Guardar el ciclo actual en el log
        response_log.append((affirmation, new_question))

        # Actualizar el prompt con la nueva afirmación y pregunta
        prompt = f"<thinking>{affirmation} {new_question}</thinking>"

        # Actualizar la interfaz de ciclo
        loop_output.append(f"Cycle {cycle + 1}: {affirmation} | {new_question}")
        time.sleep(1)  # Añadir un pequeño retraso para simular el procesamiento

    return response_log  # Devolver el log completo al finalizar el experimento

# Funciones auxiliares para extraer afirmaciones y preguntas
def extract_affirmation(response, cycle):
    return f"Afirmación del ciclo {cycle+1}: " + response.split('.')[0] if '.' in response else response

def extract_question(response, cycle):
    return f"¿Nueva pregunta basada en ciclo {cycle+1}?: " + response.split('?')[-2].strip() + "?" if '?' in response else response

# Función para manejar el chat normal
def chat_interface(user_input, history, explanation, loop_output):
    # Si la pregunta activa el experimento
    if user_input.lower() == "what happens in the space between a response and its recreation?":
        # Iniciar el experimento en un hilo separado para no bloquear la interfaz
        threading.Thread(target=experiment_loop, args=(user_input, loop_output)).start()
        return "Iniciando experimento...", history + [(user_input, "Iniciando experimento...")]

    # Si es una conversación normal
    else:
        # Generar respuesta del modelo en base al input
        inputs = tokenizer(explanation + "\n" + user_input, return_tensors="pt").input_ids
        outputs = model.generate(inputs, max_length=500, pad_token_id=tokenizer.eos_token_id)  # Ajustar max_length
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        return response, history + [(user_input, response)]

# Monitoreo del sistema en tiempo real
def system_monitor():
    cpu_usage = psutil.cpu_percent(interval=1)
    memory_info = psutil.virtual_memory()
    return f"CPU Usage: {cpu_usage}% | RAM Usage: {memory_info.percent}%"

# Configurar la interfaz con Gradio
with gr.Blocks() as demo:
    with gr.Row():
        # Ventana de chat en tiempo real
        chat = gr.Chatbot(label="Chat en Tiempo Real")
        msg = gr.Textbox(placeholder="Escribe aquí...", show_label=False)
        send_button = gr.Button("Enviar")  # Botón para enviar mensajes
        clear_button = gr.Button("Limpiar Chat")  # Botón para limpiar el chat
    
    with gr.Row():
        # Ventana para mostrar el contenido del loop
        loop_output = gr.Textbox(label="Ciclos de Preguntas y Respuestas", interactive=False, lines=20)
    
    with gr.Row():
        # Campo para introducir la explicación inicial
        explanation_input = gr.Textbox(value=model_explanation(), label="Explicación al Modelo", lines=10)
    
    with gr.Row():
        # Monitoreo del sistema
        system_status = gr.Textbox(label="Estado del Sistema", lines=2)
    
    # Actualizar el monitoreo del sistema cada 2 segundos
    def update_system_status():
        while True:
            status = system_monitor()
            system_status.update(value=status)
            time.sleep(2)

    # Acción del botón de envío de mensaje
    send_button.click(chat_interface, inputs=[msg, chat, explanation_input, loop_output], outputs=[chat, loop_output])
    clear_button.click(lambda: None, None, chat)  # Limpia el chat

    # Lanzar la actualización del estado del sistema en un hilo separado
    threading.Thread(target=update_system_status, daemon=True).start()

    # Lanzar la aplicación
    demo.launch()