import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # 配置模型路径 - 使用您本地的模型目录 MODEL_PATH = "./Qwen2.5-7B-Instruct" # 加载模型和分词器 tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True ) # 聊天函数 def chat(message, history): history = history or [] chat_history = "" for human, assistant in history: chat_history += f"<|im_start|>user\n{human}<|im_end|>\n" chat_history += f"<|im_start|>assistant\n{assistant}<|im_end|>\n" prompt = f"{chat_history}<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9, repetition_penalty=1.1, eos_token_id=tokenizer.eos_token_id ) response = tokenizer.decode( outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True ) return response # 创建界面 demo = gr.ChatInterface( chat, title="WeClone AI 助手", description="基于 Qwen2.5-7B 的聊天演示", theme="soft", examples=["你好", "介绍一下你自己", "你能做什么?"] ) # 导出为可部署对象 app = demo