ranson commited on
Commit
a861f12
·
verified ·
1 Parent(s): 8dbf2ca

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+
4
+ llm = Llama(model_path="qwen14b-ggml-Q2_K.gguf")
5
+
6
+ def format_prompt(message, history):
7
+ prompt = ""
8
+ for user_prompt, bot_response in history:
9
+ prompt += f"{user_prompt}"
10
+ prompt += f"{bot_response}"
11
+ prompt += f"{message}"
12
+ return prompt
13
+
14
+ def generate(
15
+ prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
16
+ ):
17
+ temperature = float(temperature)
18
+ if temperature < 1e-2:
19
+ temperature = 1e-2
20
+ top_p = float(top_p)
21
+ formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
22
+ stream = llm(formatted_prompt,
23
+ stream=True,
24
+ temperature=temperature,
25
+ repeat_penalty=repetition_penalty,
26
+ top_p=top_p,
27
+ max_tokens=4096
28
+ )
29
+ output = ""
30
+
31
+ for response in stream:
32
+ output += response['choices'][0]['text']
33
+ yield output
34
+ return output
35
+
36
+
37
+ additional_inputs=[
38
+ gr.Textbox(
39
+ label="System Prompt",
40
+ max_lines=1,
41
+ interactive=True,
42
+ ),
43
+ gr.Slider(
44
+ label="Temperature",
45
+ value=0.9,
46
+ minimum=0.0,
47
+ maximum=1.0,
48
+ step=0.05,
49
+ interactive=True,
50
+ info="Higher values produce more diverse outputs",
51
+ ),
52
+ gr.Slider(
53
+ label="Max new tokens",
54
+ value=256,
55
+ minimum=0,
56
+ maximum=1048,
57
+ step=64,
58
+ interactive=True,
59
+ info="The maximum numbers of new tokens",
60
+ ),
61
+ gr.Slider(
62
+ label="Top-p (nucleus sampling)",
63
+ value=0.90,
64
+ minimum=0.0,
65
+ maximum=1,
66
+ step=0.05,
67
+ interactive=True,
68
+ info="Higher values sample more low-probability tokens",
69
+ ),
70
+ gr.Slider(
71
+ label="Repetition penalty",
72
+ value=1.2,
73
+ minimum=1.0,
74
+ maximum=2.0,
75
+ step=0.05,
76
+ interactive=True,
77
+ info="Penalize repeated tokens",
78
+ )
79
+ ]
80
+
81
+ examples=[["你是谁?", None, None, None, None, None, ],
82
+ ["介绍下阿里巴巴。", None, None, None, None, None,],
83
+ ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
84
+ ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
85
+ ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
86
+ ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
87
+ ]
88
+
89
+ gr.ChatInterface(
90
+ fn=generate,
91
+ chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
92
+ additional_inputs=additional_inputs,
93
+ title="qwen14b",
94
+ examples=examples,
95
+ concurrency_limit=20,
96
+ ).launch(show_api=False)