import gradio as gr
from huggingface_hub import InferenceClient

# Substitua por seu token se for necessário: client = InferenceClient(token="seu_token_aqui")
client = InferenceClient(
    model="mistralai/Mistral-7B-Instruct-v0.3",
    token=os.getenv("HF_TOKEN")
)

# Função para processar a conversa
def responder(mensagem, historico):
    historico = historico or []
    
    # Formata o histórico como contexto
    contexto = ""
    for user_msg, bot_msg in historico:
        contexto += f"<s>[INST] {user_msg} [/INST] {bot_msg} </s>\n"
    contexto += f"<s>[INST] {mensagem} [/INST]"

    resposta = ""

    try:
        for token in client.text_generation_stream(
            prompt=contexto,
            max_new_tokens=300,
            temperature=0.4,
            top_p=0.8,
            stop_sequences=["</s>"],
        ):
            resposta += token
            yield resposta
    except Exception as e:
        print(f"Erro ao gerar resposta: {e}")
        yield "Ocorreu um erro ao gerar a resposta."

    if not resposta.strip():
        yield "Nenhuma resposta gerada. Tente novamente."

# Interface do chat com labels em português
demo = gr.ChatInterface(
    responder,
    type="messages",
    title="Benjamin – Assistente Virtual da CEaD - IBC",
    description="Tire dúvidas com minha inteligência artificial (minha base de dados vai até 2021)",
    # sem retry_btn, undo_btn, clear_btn
)

if __name__ == "__main__":
    demo.launch()