|
import gradio as gr
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
import torch
|
|
|
|
|
|
MODEL_PATH = "./Qwen2.5-7B-Instruct"
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
MODEL_PATH,
|
|
torch_dtype=torch.bfloat16,
|
|
device_map="auto",
|
|
trust_remote_code=True
|
|
)
|
|
|
|
|
|
def chat(message, history):
|
|
history = history or []
|
|
chat_history = ""
|
|
for human, assistant in history:
|
|
chat_history += f"<|im_start|>user\n{human}<|im_end|>\n"
|
|
chat_history += f"<|im_start|>assistant\n{assistant}<|im_end|>\n"
|
|
|
|
prompt = f"{chat_history}<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
|
outputs = model.generate(
|
|
**inputs,
|
|
max_new_tokens=512,
|
|
do_sample=True,
|
|
temperature=0.7,
|
|
top_p=0.9,
|
|
repetition_penalty=1.1,
|
|
eos_token_id=tokenizer.eos_token_id
|
|
)
|
|
response = tokenizer.decode(
|
|
outputs[0][inputs.input_ids.shape[1]:],
|
|
skip_special_tokens=True
|
|
)
|
|
return response
|
|
|
|
|
|
demo = gr.ChatInterface(
|
|
chat,
|
|
title="WeClone AI 助手",
|
|
description="基于 Qwen2.5-7B 的聊天演示",
|
|
theme="soft",
|
|
examples=["你好", "介绍一下你自己", "你能做什么?"]
|
|
)
|
|
|
|
|
|
app = demo |