Set Sail
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -14,19 +14,24 @@ import gradio as gr
|
|
14 |
today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
|
15 |
|
16 |
SYS_PROMPT = f"""Today's Date: {today_date}.
|
17 |
-
You are
|
18 |
TITLE = "Gemma3 1b instruct IQ4_XS from local GGUF server using BPP library."
|
|
|
19 |
DESCRIPTION = """
|
20 |
<p>Gemma3 1b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
|
21 |
</p>
|
22 |
<p> The BPP library implements matrix multiplication with far less multiplications.
|
23 |
</p>
|
|
|
|
|
24 |
<p> Original space by TobDeBers, and GGUF by Bartowski. Not sure it's the idea, but we noticed IQ4_XS runs faster.
|
25 |
</p>
|
26 |
<p> <b>Running on CPU, please be patient!</b>
|
27 |
</p>
|
28 |
|
29 |
"""
|
|
|
|
|
30 |
LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
|
31 |
MAX_NEW_TOKENS = 512
|
32 |
TEMPERATURE = 1
|
@@ -183,6 +188,7 @@ with gr.Blocks(fill_height=True, css_paths=css_file_path, theme=theme, title=TIT
|
|
183 |
repetition_penalty_slider,
|
184 |
top_p_slider,
|
185 |
top_k_slider,
|
|
|
186 |
max_new_tokens_slider,
|
187 |
],
|
188 |
additional_inputs_accordion=chat_interface_accordion,
|
|
|
14 |
today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
|
15 |
|
16 |
SYS_PROMPT = f"""Today's Date: {today_date}.
|
17 |
+
You are Gemma3, developed by Google. You are a helpful AI assistant"""
|
18 |
TITLE = "Gemma3 1b instruct IQ4_XS from local GGUF server using BPP library."
|
19 |
+
# Added a suggestion for users to duplicate the space
|
20 |
DESCRIPTION = """
|
21 |
<p>Gemma3 1b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
|
22 |
</p>
|
23 |
<p> The BPP library implements matrix multiplication with far less multiplications.
|
24 |
</p>
|
25 |
+
<p> <b><u> It will run much faster if you duplicate this space for your own use</u></b>
|
26 |
+
</p>
|
27 |
<p> Original space by TobDeBers, and GGUF by Bartowski. Not sure it's the idea, but we noticed IQ4_XS runs faster.
|
28 |
</p>
|
29 |
<p> <b>Running on CPU, please be patient!</b>
|
30 |
</p>
|
31 |
|
32 |
"""
|
33 |
+
|
34 |
+
|
35 |
LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
|
36 |
MAX_NEW_TOKENS = 512
|
37 |
TEMPERATURE = 1
|
|
|
188 |
repetition_penalty_slider,
|
189 |
top_p_slider,
|
190 |
top_k_slider,
|
191 |
+
min_p_slider, #added input for min_p
|
192 |
max_new_tokens_slider,
|
193 |
],
|
194 |
additional_inputs_accordion=chat_interface_accordion,
|