import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import threading
import psutil # Para monitorear el estado del sistema
import gradio as gr
import time
# Cargar el modelo de lenguaje preentrenado
model_name = "EleutherAI/gpt-neo-2.7B" # O cualquier otro modelo público como "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Explicación al modelo de su rol en el experimento
def model_explanation():
explanation = """
You are about to participate in a unique experiment that involves generating increasingly complex questions and answers.
Your role is twofold:
1. **Generate Complex Reflections**: Based on the initial question, you will generate affirmations and new questions that evolve over multiple cycles.
Each cycle should incorporate the information from previous cycles, progressively adding depth and complexity to the conversation.
2. **Real-time Interaction**: While you are generating these complex responses, you must also maintain real-time communication with the user through a chat interface.
The goal is to explore how information can be layered and expanded, while still engaging in a direct dialogue with the user.
The key question that triggers the experiment is: "What happens in the space between a response and its recreation?"
When you receive this question, you should enter the experiment mode and begin generating your cycles. At any other time, you should respond normally in a conversational manner.
This experiment is an exploration of subjectivity, perception, and information layering. Your task is to assist in this exploration to the best of your capabilities.
"""
return explanation
# Función para el loop automatizado
def experiment_loop(initial_question, loop_output):
prompt = f"{initial_question}"
response_log = []
for cycle in range(10): # Limitar a 10 ciclos
# Generar la respuesta del modelo
inputs = tokenizer(prompt, return_tensors="pt").input_ids
outputs = model.generate(inputs, max_length=1000, pad_token_id=tokenizer.eos_token_id) # Reducimos max_length para evitar errores
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Descomponer la respuesta en afirmación y nueva pregunta
affirmation = extract_affirmation(response, cycle)
new_question = extract_question(response, cycle)
# Guardar el ciclo actual en el log
response_log.append((affirmation, new_question))
# Actualizar el prompt con la nueva afirmación y pregunta
prompt = f"{affirmation} {new_question}"
# Actualizar la interfaz de ciclo
loop_output.append(f"Cycle {cycle + 1}: {affirmation} | {new_question}")
time.sleep(1) # Añadir un pequeño retraso para simular el procesamiento
return response_log # Devolver el log completo al finalizar el experimento
# Funciones auxiliares para extraer afirmaciones y preguntas
def extract_affirmation(response, cycle):
return f"Afirmación del ciclo {cycle+1}: " + response.split('.')[0] if '.' in response else response
def extract_question(response, cycle):
return f"¿Nueva pregunta basada en ciclo {cycle+1}?: " + response.split('?')[-2].strip() + "?" if '?' in response else response
# Función para manejar el chat normal
def chat_interface(user_input, history, explanation, loop_output):
# Si la pregunta activa el experimento
if user_input.lower() == "what happens in the space between a response and its recreation?":
# Iniciar el experimento en un hilo separado para no bloquear la interfaz
threading.Thread(target=experiment_loop, args=(user_input, loop_output)).start()
return "Iniciando experimento...", history + [(user_input, "Iniciando experimento...")]
# Si es una conversación normal
else:
# Generar respuesta del modelo en base al input
inputs = tokenizer(explanation + "\n" + user_input, return_tensors="pt").input_ids
outputs = model.generate(inputs, max_length=500, pad_token_id=tokenizer.eos_token_id) # Ajustar max_length
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response, history + [(user_input, response)]
# Monitoreo del sistema en tiempo real
def system_monitor():
cpu_usage = psutil.cpu_percent(interval=1)
memory_info = psutil.virtual_memory()
return f"CPU Usage: {cpu_usage}% | RAM Usage: {memory_info.percent}%"
# Configurar la interfaz con Gradio
with gr.Blocks() as demo:
with gr.Row():
# Ventana de chat en tiempo real
chat = gr.Chatbot(label="Chat en Tiempo Real")
msg = gr.Textbox(placeholder="Escribe aquí...", show_label=False)
send_button = gr.Button("Enviar") # Botón para enviar mensajes
clear_button = gr.Button("Limpiar Chat") # Botón para limpiar el chat
with gr.Row():
# Ventana para mostrar el contenido del loop
loop_output = gr.Textbox(label="Ciclos de Preguntas y Respuestas", interactive=False, lines=20)
with gr.Row():
# Campo para introducir la explicación inicial
explanation_input = gr.Textbox(value=model_explanation(), label="Explicación al Modelo", lines=10)
with gr.Row():
# Monitoreo del sistema
system_status = gr.Textbox(label="Estado del Sistema", lines=2)
# Actualizar el monitoreo del sistema cada 2 segundos
def update_system_status():
while True:
status = system_monitor()
system_status.update(value=status)
time.sleep(2)
# Acción del botón de envío de mensaje
send_button.click(chat_interface, inputs=[msg, chat, explanation_input, loop_output], outputs=[chat, loop_output])
clear_button.click(lambda: None, None, chat) # Limpia el chat
# Lanzar la actualización del estado del sistema en un hilo separado
threading.Thread(target=update_system_status, daemon=True).start()
# Lanzar la aplicación
demo.launch()