import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 配置模型路径 - 使用您本地的模型目录
MODEL_PATH = "./Qwen2.5-7B-Instruct"

# 加载模型和分词器
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

# 聊天函数
def chat(message, history):
    history = history or []
    chat_history = ""
    for human, assistant in history:
        chat_history += f"<|im_start|>user\n{human}<|im_end|>\n"
        chat_history += f"<|im_start|>assistant\n{assistant}<|im_end|>\n"
    
    prompt = f"{chat_history}<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id
    )
    response = tokenizer.decode(
        outputs[0][inputs.input_ids.shape[1]:], 
        skip_special_tokens=True
    )
    return response

# 创建界面
demo = gr.ChatInterface(
    chat,
    title="WeClone AI 助手",
    description="基于 Qwen2.5-7B 的聊天演示",
    theme="soft",
    examples=["你好", "介绍一下你自己", "你能做什么？"]
)

# 导出为可部署对象
app = demo