sitammeur commited on
Commit
8ec0364
·
verified ·
1 Parent(s): 6d26dbf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -190
app.py CHANGED
@@ -1,190 +1,188 @@
1
- # Importing required libraries
2
- import json
3
- import subprocess
4
- from llama_cpp import Llama
5
- from llama_cpp_agent import LlamaCppAgent
6
- from llama_cpp_agent import MessagesFormatterType
7
- from llama_cpp_agent.providers import LlamaCppPythonProvider
8
- from llama_cpp_agent.chat_history import BasicChatHistory
9
- from llama_cpp_agent.chat_history.messages import Roles
10
- import gradio as gr
11
- from huggingface_hub import hf_hub_download
12
-
13
-
14
- # Download gguf model files
15
- llm = None
16
- llm_model = None
17
-
18
- hf_hub_download(
19
- repo_id="bartowski/Dolphin3.0-Llama3.2-1B-GGUF",
20
- filename="Dolphin3.0-Llama3.2-1B-Q6_K.gguf",
21
- local_dir="./models",
22
- )
23
- hf_hub_download(
24
- repo_id="bartowski/Dolphin3.0-Qwen2.5-1.5B-GGUF",
25
- filename="Dolphin3.0-Qwen2.5-1.5B-Q6_K.gguf",
26
- local_dir="./models",
27
- )
28
- hf_hub_download(
29
- repo_id="bartowski/Dolphin3.0-Qwen2.5-0.5B-GGUF",
30
- filename="Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",
31
- local_dir="./models",
32
- )
33
-
34
- # Set the title and description
35
- title = "Dolphin-3 Llama.cpp 🐬"
36
- description = """Dolphin 3.0 is a powerful, general-purpose local AI model designed for coding, math, and various other tasks, aiming similar to the models like ChatGPT and Claude."""
37
- examples = [
38
- ["Provide a historical fact about the capital of France."],
39
- ["Suggest a three-day itinerary for exploring Tokyo and nearby areas?"],
40
- ["Can you write a short, imaginative story about exploring a forest?"],
41
- ]
42
-
43
-
44
- def respond(
45
- message,
46
- history: list[tuple[str, str]],
47
- model,
48
- system_message,
49
- max_tokens,
50
- temperature,
51
- top_p,
52
- top_k,
53
- repeat_penalty,
54
- ):
55
- """
56
- Respond to a message using the Dolphin-3 model via Llama.cpp.
57
-
58
- Args:
59
- - message (str): The message to respond to.
60
- - history (list[tuple[str, str]]): The chat history.
61
- - model (str): The model to use.
62
- - max_tokens (int): The maximum number of tokens to generate.
63
- - temperature (float): The temperature of the model.
64
- - top_p (float): The top-p of the model.
65
- - top_k (int): The top-k of the model.
66
- - repeat_penalty (float): The repetition penalty of the model.
67
-
68
- Returns:
69
- str: The response to the message.
70
- """
71
- # Load the global variables
72
- global llm
73
- global llm_model
74
-
75
- # Load the model
76
- if llm is None or llm_model != model:
77
- llm = Llama(
78
- model_path=f"models/{model}",
79
- flash_attn=False,
80
- n_gpu_layers=0,
81
- n_batch=32,
82
- n_ctx=8192,
83
- )
84
- llm_model = model
85
- provider = LlamaCppPythonProvider(llm)
86
-
87
- # Create the agent
88
- agent = LlamaCppAgent(
89
- provider,
90
- system_prompt=f"{system_message}",
91
- predefined_messages_formatter_type=MessagesFormatterType.CHATML,
92
- debug_output=True,
93
- )
94
-
95
- # Set the settings like temperature, top-k, top-p, max tokens, etc.
96
- settings = provider.get_provider_default_settings()
97
- settings.temperature = temperature
98
- settings.top_k = top_k
99
- settings.top_p = top_p
100
- settings.max_tokens = max_tokens
101
- settings.repeat_penalty = repeat_penalty
102
- settings.stream = True
103
-
104
- messages = BasicChatHistory()
105
-
106
- # Add the chat history
107
- for msn in history:
108
- user = {"role": Roles.user, "content": msn[0]}
109
- assistant = {"role": Roles.assistant, "content": msn[1]}
110
- messages.add_message(user)
111
- messages.add_message(assistant)
112
-
113
- # Get the response stream
114
- stream = agent.get_chat_response(
115
- message,
116
- llm_sampling_settings=settings,
117
- chat_history=messages,
118
- returns_streaming_generator=True,
119
- print_output=False,
120
- )
121
-
122
- # Generate the response
123
- outputs = ""
124
- for output in stream:
125
- outputs += output
126
- yield outputs
127
-
128
-
129
- # Create a chat interface
130
- demo = gr.ChatInterface(
131
- respond,
132
- additional_inputs_accordion=gr.Accordion(
133
- label="⚙️ Parameters", open=False, render=False
134
- ),
135
- additional_inputs=[
136
- gr.Dropdown(
137
- [
138
- "Dolphin3.0-Llama3.2-1B-Q6_K.gguf",
139
- "Dolphin3.0-Qwen2.5-1.5B-Q6_K.gguf",
140
- "Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",
141
- ],
142
- value="Dolphin3.0-Llama3.2-1B-Q6_K.gguf",
143
- label="Model",
144
- ),
145
- gr.Textbox(
146
- value="You are Dolphin an AI assistant that helps humanity.",
147
- label="System message",
148
- ),
149
- gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"),
150
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
151
- gr.Slider(
152
- minimum=0.1,
153
- maximum=1.0,
154
- value=0.95,
155
- step=0.05,
156
- label="Top-p",
157
- ),
158
- gr.Slider(
159
- minimum=0,
160
- maximum=100,
161
- value=40,
162
- step=1,
163
- label="Top-k",
164
- ),
165
- gr.Slider(
166
- minimum=0.0,
167
- maximum=2.0,
168
- value=1.1,
169
- step=0.1,
170
- label="Repetition penalty",
171
- ),
172
- ],
173
- theme="Glass",
174
- retry_btn="Retry",
175
- undo_btn="Undo",
176
- clear_btn="Clear",
177
- submit_btn="Send",
178
- title=title,
179
- description=description,
180
- chatbot=gr.Chatbot(scale=1, show_copy_button=True),
181
- examples=examples,
182
- cache_examples=True,
183
- cache_mode="lazy",
184
- flagging_mode="never",
185
- )
186
-
187
-
188
- # Launch the chat interface
189
- if __name__ == "__main__":
190
- demo.launch(debug=False)
 
1
+ # Importing required libraries
2
+ import json
3
+ import subprocess
4
+ from llama_cpp import Llama
5
+ from llama_cpp_agent import LlamaCppAgent
6
+ from llama_cpp_agent import MessagesFormatterType
7
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
8
+ from llama_cpp_agent.chat_history import BasicChatHistory
9
+ from llama_cpp_agent.chat_history.messages import Roles
10
+ import gradio as gr
11
+ from huggingface_hub import hf_hub_download
12
+
13
+
14
+ # Download gguf model files
15
+ llm = None
16
+ llm_model = None
17
+
18
+ hf_hub_download(
19
+ repo_id="bartowski/Dolphin3.0-Llama3.2-1B-GGUF",
20
+ filename="Dolphin3.0-Llama3.2-1B-Q6_K.gguf",
21
+ local_dir="./models",
22
+ )
23
+ hf_hub_download(
24
+ repo_id="bartowski/Dolphin3.0-Qwen2.5-1.5B-GGUF",
25
+ filename="Dolphin3.0-Qwen2.5-1.5B-Q6_K.gguf",
26
+ local_dir="./models",
27
+ )
28
+ hf_hub_download(
29
+ repo_id="bartowski/Dolphin3.0-Qwen2.5-0.5B-GGUF",
30
+ filename="Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",
31
+ local_dir="./models",
32
+ )
33
+
34
+ # Set the title and description
35
+ title = "Dolphin-3 Llama.cpp 🐬"
36
+ description = """Dolphin 3.0 is a powerful, general-purpose local AI model designed for coding, math, and various other tasks, aiming similar to the models like ChatGPT and Claude."""
37
+ examples = [
38
+ ["Provide a historical fact about the capital of France."],
39
+ ["Suggest a three-day itinerary for exploring Tokyo and nearby areas?"],
40
+ ["Can you write a short, imaginative story about exploring a forest?"],
41
+ ]
42
+
43
+
44
+ def respond(
45
+ message,
46
+ history: list[tuple[str, str]],
47
+ model,
48
+ system_message,
49
+ max_tokens,
50
+ temperature,
51
+ top_p,
52
+ top_k,
53
+ repeat_penalty,
54
+ ):
55
+ """
56
+ Respond to a message using the Dolphin-3 model via Llama.cpp.
57
+
58
+ Args:
59
+ - message (str): The message to respond to.
60
+ - history (list[tuple[str, str]]): The chat history.
61
+ - model (str): The model to use.
62
+ - max_tokens (int): The maximum number of tokens to generate.
63
+ - temperature (float): The temperature of the model.
64
+ - top_p (float): The top-p of the model.
65
+ - top_k (int): The top-k of the model.
66
+ - repeat_penalty (float): The repetition penalty of the model.
67
+
68
+ Returns:
69
+ str: The response to the message.
70
+ """
71
+ # Load the global variables
72
+ global llm
73
+ global llm_model
74
+
75
+ # Load the model
76
+ if llm is None or llm_model != model:
77
+ llm = Llama(
78
+ model_path=f"models/{model}",
79
+ flash_attn=False,
80
+ n_gpu_layers=0,
81
+ n_batch=32,
82
+ n_ctx=8192,
83
+ )
84
+ llm_model = model
85
+ provider = LlamaCppPythonProvider(llm)
86
+
87
+ # Create the agent
88
+ agent = LlamaCppAgent(
89
+ provider,
90
+ system_prompt=f"{system_message}",
91
+ predefined_messages_formatter_type=MessagesFormatterType.CHATML,
92
+ debug_output=True,
93
+ )
94
+
95
+ # Set the settings like temperature, top-k, top-p, max tokens, etc.
96
+ settings = provider.get_provider_default_settings()
97
+ settings.temperature = temperature
98
+ settings.top_k = top_k
99
+ settings.top_p = top_p
100
+ settings.max_tokens = max_tokens
101
+ settings.repeat_penalty = repeat_penalty
102
+ settings.stream = True
103
+
104
+ messages = BasicChatHistory()
105
+
106
+ # Add the chat history
107
+ for msn in history:
108
+ user = {"role": Roles.user, "content": msn[0]}
109
+ assistant = {"role": Roles.assistant, "content": msn[1]}
110
+ messages.add_message(user)
111
+ messages.add_message(assistant)
112
+
113
+ # Get the response stream
114
+ stream = agent.get_chat_response(
115
+ message,
116
+ llm_sampling_settings=settings,
117
+ chat_history=messages,
118
+ returns_streaming_generator=True,
119
+ print_output=False,
120
+ )
121
+
122
+ # Generate the response
123
+ outputs = ""
124
+ for output in stream:
125
+ outputs += output
126
+ yield outputs
127
+
128
+
129
+ # Create a chat interface
130
+ demo = gr.ChatInterface(
131
+ respond,
132
+ additional_inputs_accordion=gr.Accordion(
133
+ label="⚙️ Parameters", open=False, render=False
134
+ ),
135
+ additional_inputs=[
136
+ gr.Dropdown(
137
+ [
138
+ "Dolphin3.0-Llama3.2-1B-Q6_K.gguf",
139
+ "Dolphin3.0-Qwen2.5-1.5B-Q6_K.gguf",
140
+ "Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",
141
+ ],
142
+ value="Dolphin3.0-Llama3.2-1B-Q6_K.gguf",
143
+ label="Model",
144
+ ),
145
+ gr.Textbox(
146
+ value="You are Dolphin an AI assistant that helps humanity.",
147
+ label="System message",
148
+ ),
149
+ gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"),
150
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
151
+ gr.Slider(
152
+ minimum=0.1,
153
+ maximum=1.0,
154
+ value=0.95,
155
+ step=0.05,
156
+ label="Top-p",
157
+ ),
158
+ gr.Slider(
159
+ minimum=0,
160
+ maximum=100,
161
+ value=40,
162
+ step=1,
163
+ label="Top-k",
164
+ ),
165
+ gr.Slider(
166
+ minimum=0.0,
167
+ maximum=2.0,
168
+ value=1.1,
169
+ step=0.1,
170
+ label="Repetition penalty",
171
+ ),
172
+ ],
173
+ theme="Glass",
174
+ submit_btn="Send",
175
+ stop_btn="Stop",
176
+ title=title,
177
+ description=description,
178
+ chatbot=gr.Chatbot(scale=1, show_copy_button=True),
179
+ examples=examples,
180
+ cache_examples=True,
181
+ cache_mode="lazy",
182
+ flagging_mode="never",
183
+ )
184
+
185
+
186
+ # Launch the chat interface
187
+ if __name__ == "__main__":
188
+ demo.launch(debug=False)