Gemma-3-pt-llamacpp

Running

Bradarr commited on Mar 13

Commit

971430d

verified ·

1 Parent(s): 37e38a4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,7 +37,8 @@ if not os.path.exists("./models/google.gemma-3-12b-pt.Q4_K_M.gguf"):
     download_model("DevQuasar/google.gemma-3-12b-pt-GGUF", "google.gemma-3-12b-pt.Q4_K_M.gguf")
 if not os.path.exists("./models/google.gemma-3-4b-pt.Q4_K_M.gguf"):  # Example from original, in case needed.
      download_model("DevQuasar/google.gemma-3-4b-pt-GGUF", "google.gemma-3-4b-pt.Q4_K_M.gguf")
 # Set the title and description
 title = "Gemma Text Generation"
@@ -87,8 +88,6 @@ def generate_text(
                 flash_attn=True,
                 n_gpu_layers=999,  # Adjust based on your GPU availability
                 n_ctx=4096,      # Context window size.  Can increase.
-                n_threads=4,   # Adjust as needed for performance.
-                n_threads_batch=4,
                 verbose=False  #Reduce unnecessary verbosity
             )
             llm_model = model
@@ -144,6 +143,7 @@ with gr.Blocks(theme="Ocean", title=title) as demo:
                     "google.gemma-3-1b-pt.Q4_K_M.gguf",
                     "google.gemma-3-4b-pt.Q4_K_M.gguf",
                     "google.gemma-3-12b-pt.Q4_K_M.gguf",
                     # Add other models as needed and downloaded
                 ],
                 value="google.gemma-3-1b-pt.Q4_K_M.gguf",  # Default model

     download_model("DevQuasar/google.gemma-3-12b-pt-GGUF", "google.gemma-3-12b-pt.Q4_K_M.gguf")
 if not os.path.exists("./models/google.gemma-3-4b-pt.Q4_K_M.gguf"):  # Example from original, in case needed.
      download_model("DevQuasar/google.gemma-3-4b-pt-GGUF", "google.gemma-3-4b-pt.Q4_K_M.gguf")
+if not os.path.exists("./models/google.gemma-3-27b-pt.Q4_K_M.gguf"):  # Example from original, in case needed.
+     download_model("DevQuasar/google.gemma-3-27b-pt-GGUF", "google.gemma-3-27b-pt.Q4_K_M.gguf")
 # Set the title and description
 title = "Gemma Text Generation"
                 flash_attn=True,
                 n_gpu_layers=999,  # Adjust based on your GPU availability
                 n_ctx=4096,      # Context window size.  Can increase.
                 verbose=False  #Reduce unnecessary verbosity
             )
             llm_model = model
                     "google.gemma-3-1b-pt.Q4_K_M.gguf",
                     "google.gemma-3-4b-pt.Q4_K_M.gguf",
                     "google.gemma-3-12b-pt.Q4_K_M.gguf",
+                    "google.gemma-3-27b-pt.Q4_K_M.gguf",
                     # Add other models as needed and downloaded
                 ],
                 value="google.gemma-3-1b-pt.Q4_K_M.gguf",  # Default model