Set Sail commited on
Commit
869e110
·
verified ·
1 Parent(s): 8038988

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -14,19 +14,24 @@ import gradio as gr
14
  today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
15
 
16
  SYS_PROMPT = f"""Today's Date: {today_date}.
17
- You are Gemma, developed by Google. You are a helpful AI assistant"""
18
  TITLE = "Gemma3 1b instruct IQ4_XS from local GGUF server using BPP library."
 
19
  DESCRIPTION = """
20
  <p>Gemma3 1b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
21
  </p>
22
  <p> The BPP library implements matrix multiplication with far less multiplications.
23
  </p>
 
 
24
  <p> Original space by TobDeBers, and GGUF by Bartowski. Not sure it's the idea, but we noticed IQ4_XS runs faster.
25
  </p>
26
  <p> <b>Running on CPU, please be patient!</b>
27
  </p>
28
 
29
  """
 
 
30
  LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
31
  MAX_NEW_TOKENS = 512
32
  TEMPERATURE = 1
@@ -183,6 +188,7 @@ with gr.Blocks(fill_height=True, css_paths=css_file_path, theme=theme, title=TIT
183
  repetition_penalty_slider,
184
  top_p_slider,
185
  top_k_slider,
 
186
  max_new_tokens_slider,
187
  ],
188
  additional_inputs_accordion=chat_interface_accordion,
 
14
  today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
15
 
16
  SYS_PROMPT = f"""Today's Date: {today_date}.
17
+ You are Gemma3, developed by Google. You are a helpful AI assistant"""
18
  TITLE = "Gemma3 1b instruct IQ4_XS from local GGUF server using BPP library."
19
+ # Added a suggestion for users to duplicate the space
20
  DESCRIPTION = """
21
  <p>Gemma3 1b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
22
  </p>
23
  <p> The BPP library implements matrix multiplication with far less multiplications.
24
  </p>
25
+ <p> <b><u> It will run much faster if you duplicate this space for your own use</u></b>
26
+ </p>
27
  <p> Original space by TobDeBers, and GGUF by Bartowski. Not sure it's the idea, but we noticed IQ4_XS runs faster.
28
  </p>
29
  <p> <b>Running on CPU, please be patient!</b>
30
  </p>
31
 
32
  """
33
+
34
+
35
  LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
36
  MAX_NEW_TOKENS = 512
37
  TEMPERATURE = 1
 
188
  repetition_penalty_slider,
189
  top_p_slider,
190
  top_k_slider,
191
+ min_p_slider, #added input for min_p
192
  max_new_tokens_slider,
193
  ],
194
  additional_inputs_accordion=chat_interface_accordion,