TobDeBers_BPP_Gemma3_1b

Paused

Set Sail commited on Mar 27

Commit

869e110

verified ·

1 Parent(s): 8038988

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,19 +14,24 @@ import gradio as gr
 today_date = datetime.today().strftime("%B %-d, %Y")  # noqa: DTZ002
 SYS_PROMPT = f"""Today's Date: {today_date}.
-You are Gemma, developed by Google. You are a helpful AI assistant"""
 TITLE = "Gemma3 1b instruct IQ4_XS from local GGUF server using BPP library."
 DESCRIPTION = """
 <p>Gemma3 1b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
 </p>
 <p> The BPP library implements matrix multiplication with far less multiplications.
 </p>
 <p> Original space by TobDeBers, and GGUF by Bartowski. Not sure it's the idea, but we noticed IQ4_XS runs faster.
 </p>
 <p> <b>Running on CPU, please be patient!</b>
 </p>
 """
 LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
 MAX_NEW_TOKENS = 512
 TEMPERATURE = 1
@@ -183,6 +188,7 @@ with gr.Blocks(fill_height=True, css_paths=css_file_path, theme=theme, title=TIT
             repetition_penalty_slider,
             top_p_slider,
             top_k_slider,
             max_new_tokens_slider,
         ],
         additional_inputs_accordion=chat_interface_accordion,

 today_date = datetime.today().strftime("%B %-d, %Y")  # noqa: DTZ002
 SYS_PROMPT = f"""Today's Date: {today_date}.
+You are Gemma3, developed by Google. You are a helpful AI assistant"""
 TITLE = "Gemma3 1b instruct IQ4_XS from local GGUF server using BPP library."
+# Added a suggestion for users to duplicate the space
 DESCRIPTION = """
 <p>Gemma3 1b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
 </p>
 <p> The BPP library implements matrix multiplication with far less multiplications.
 </p>
+<p> <b><u> It will run much faster if you duplicate this space for your own use</u></b>
+</p>
 <p> Original space by TobDeBers, and GGUF by Bartowski. Not sure it's the idea, but we noticed IQ4_XS runs faster.
 </p>
 <p> <b>Running on CPU, please be patient!</b>
 </p>
 """
 LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
 MAX_NEW_TOKENS = 512
 TEMPERATURE = 1
             repetition_penalty_slider,
             top_p_slider,
             top_k_slider,
+            min_p_slider, #added input for min_p
             max_new_tokens_slider,
         ],
         additional_inputs_accordion=chat_interface_accordion,