import os from llama_cpp import Llama from huggingface_hub import hf_hub_download import gradio as gr REPO_ID = "Xm3Ga/Nemotron-Research-Reasoning-Qwen-1.5B-Q5_K_M-GGUF" FNAME = "nemotron-research-reasoning-qwen-1.5b-q5_k_m.gguf" MODEL_DIR = "models" MODEL_PATH = os.path.join(MODEL_DIR, FNAME) def ensure_model(): os.makedirs(MODEL_DIR, exist_ok=True) if not os.path.exists(MODEL_PATH): print(f"Downloading {REPO_ID}/{FNAME} …") hf_hub_download(repo_id=REPO_ID, filename=FNAME, local_dir=MODEL_DIR, force_filename=FNAME) return MODEL_PATH model_path = ensure_model() llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4, verbose=False) # 添加固定的系统提示词,定义身份和语言要求 SYSTEM_PROMPT = ( "你是一名专业的信息安全助手," "请始终使用中文回答所有问题," "确保回答专业、准确、简洁。" ) def chat_fn(user_message, history): history = history or [] prompt = SYSTEM_PROMPT + "\n" for u, a in history: prompt += f"用户: {u}\n助手: {a}\n" prompt += f"用户: {user_message}\n助手:" resp = llm(prompt, max_tokens=256, temperature=0.7, top_p=0.9) text = resp["choices"][0]["text"].strip() history.append((user_message, text)) return history, "" with gr.Blocks(css=".gradio-container { max-width: 700px; margin: auto; }") as demo: gr.Markdown("## 信息安全助手") chatbot = gr.Chatbot(elem_id="chatbot") user_input = gr.Textbox(placeholder="输入消息,按 Enter 发送", lines=1) user_input.submit(chat_fn, [user_input, chatbot], [chatbot, user_input]) demo.launch(ssr_mode=False)