Commit
·
d27a61e
verified
·
0
Parent(s):
Super-squash history to reclaim storage
Browse files- .gitattributes +72 -0
- README.md +264 -0
- car-1.jpg +3 -0
- google_gemma-3-12b-it-bf16-q8.gguf +3 -0
- google_gemma-3-12b-it-bf16.gguf +3 -0
- google_gemma-3-12b-it-f16-q8.gguf +3 -0
- google_gemma-3-12b-it-iq2_m.gguf +3 -0
- google_gemma-3-12b-it-iq2_s.gguf +3 -0
- google_gemma-3-12b-it-iq2_xs.gguf +3 -0
- google_gemma-3-12b-it-iq3_m.gguf +3 -0
- google_gemma-3-12b-it-iq3_s.gguf +3 -0
- google_gemma-3-12b-it-iq3_xs.gguf +3 -0
- google_gemma-3-12b-it-iq4_nl.gguf +3 -0
- google_gemma-3-12b-it-iq4_xs.gguf +3 -0
- google_gemma-3-12b-it-mmproj-bf16.gguf +3 -0
- google_gemma-3-12b-it-mmproj-f16.gguf +3 -0
- google_gemma-3-12b-it-mmproj-f32.gguf +3 -0
- google_gemma-3-12b-it-mmproj-q8_0.gguf +3 -0
- google_gemma-3-12b-it-q2_k_s.guuf +3 -0
- google_gemma-3-12b-it-q3_k_m.gguf +3 -0
- google_gemma-3-12b-it-q3_k_s.gguf +3 -0
- google_gemma-3-12b-it-q4_0.gguf +3 -0
- google_gemma-3-12b-it-q4_1.gguf +3 -0
- google_gemma-3-12b-it-q4_k_m.gguf +3 -0
- google_gemma-3-12b-it-q4_k_s.gguf +3 -0
- google_gemma-3-12b-it-q5_k_m.gguf +3 -0
- google_gemma-3-12b-it-q5_k_s.gguf +3 -0
- google_gemma-3-12b-it-q6_k_m.gguf +3 -0
- google_gemma-3-12b-it-q8.gguf +3 -0
- google_gemma-3-12b-it-tq1_0.gguf +3 -0
- google_gemma-3-12b-it-tq2_0.gguf +3 -0
.gitattributes
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
google_gemma-3-12b-it-q3_k_l.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
google_gemma-3-12b-it-mmproj-bf16.gguf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
google_gemma-3-12b-it-q6_k_m.gguf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
google_gemma-3-12b-it-q4_k_l.gguf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
google_gemma-3-12b-it-f16-q8.gguf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
google_gemma-3-12b-it-mmproj-q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
42 |
+
google_gemma-3-12b-it-q5_k_l.gguf filter=lfs diff=lfs merge=lfs -text
|
43 |
+
google_gemma-3-12b-it-q4_k_s.gguf filter=lfs diff=lfs merge=lfs -text
|
44 |
+
google_gemma-3-12b-it-q3_k_s.gguf filter=lfs diff=lfs merge=lfs -text
|
45 |
+
google_gemma-3-12b-it-q6_k_l.gguf filter=lfs diff=lfs merge=lfs -text
|
46 |
+
google_gemma-3-12b-it-q5_k_s.gguf filter=lfs diff=lfs merge=lfs -text
|
47 |
+
google_gemma-3-12b-it-bf16.gguf filter=lfs diff=lfs merge=lfs -text
|
48 |
+
google_gemma-3-12b-it-q4_1.gguf filter=lfs diff=lfs merge=lfs -text
|
49 |
+
google_gemma-3-12b-it-q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
50 |
+
google_gemma-3-12b-it-iq4_xs.gguf filter=lfs diff=lfs merge=lfs -text
|
51 |
+
google_gemma-3-12b-it-iq3_xs.gguf filter=lfs diff=lfs merge=lfs -text
|
52 |
+
google_gemma-3-12b-it-q8.gguf filter=lfs diff=lfs merge=lfs -text
|
53 |
+
google_gemma-3-12b-it-q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text
|
54 |
+
google_gemma-3-12b-it-mmproj-f32.gguf filter=lfs diff=lfs merge=lfs -text
|
55 |
+
google_gemma-3-12b-it-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
|
56 |
+
google_gemma-3-12b-it-bf16-q8.gguf filter=lfs diff=lfs merge=lfs -text
|
57 |
+
google_gemma-3-12b-it-mmproj-f16.gguf filter=lfs diff=lfs merge=lfs -text
|
58 |
+
google_gemma-3-12b-it-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text
|
59 |
+
google_gemma-3-12b-it-iq4_nl.gguf filter=lfs diff=lfs merge=lfs -text
|
60 |
+
google_gemma-3-12b-it-q2_k_l.gguf filter=lfs diff=lfs merge=lfs -text
|
61 |
+
car-1.jpg filter=lfs diff=lfs merge=lfs -text
|
62 |
+
google_gemma-3-12b-it-iq2_m.gguf filter=lfs diff=lfs merge=lfs -text
|
63 |
+
google_gemma-3-12b-it-iq1_s.gguf filter=lfs diff=lfs merge=lfs -text
|
64 |
+
google_gemma-3-12b-it-tq2_0.gguf filter=lfs diff=lfs merge=lfs -text
|
65 |
+
google_gemma-3-12b-it-iq3_m.gguf filter=lfs diff=lfs merge=lfs -text
|
66 |
+
google_gemma-3-12b-it-tq1_0.gguf filter=lfs diff=lfs merge=lfs -text
|
67 |
+
google_gemma-3-12b-it-iq3_s.gguf filter=lfs diff=lfs merge=lfs -text
|
68 |
+
google_gemma-3-12b-it-iq2_s.gguf filter=lfs diff=lfs merge=lfs -text
|
69 |
+
google_gemma-3-12b-it-iq1_m.gguf filter=lfs diff=lfs merge=lfs -text
|
70 |
+
google_gemma-3-12b-it-f16.gguf filter=lfs diff=lfs merge=lfs -text
|
71 |
+
google_gemma-3-12b-it-q2_k_s.guuf filter=lfs diff=lfs merge=lfs -text
|
72 |
+
google_gemma-3-12b-it-iq2_xs.gguf filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: gemma
|
3 |
+
pipeline_tag: image-text-to-text
|
4 |
+
tags:
|
5 |
+
- gemma
|
6 |
+
- vision
|
7 |
+
- image
|
8 |
+
- llama.cpp
|
9 |
+
---
|
10 |
+
|
11 |
+
# <span style="color: #7FFF7F;">Gemma-3 12B Instruct GGUF Models</span>
|
12 |
+
|
13 |
+
|
14 |
+
## How to Use Gemma 3 Vision with llama.cpp
|
15 |
+
|
16 |
+
To utilize the experimental support for Gemma 3 Vision in `llama.cpp`, follow these steps:
|
17 |
+
|
18 |
+
1. **Clone the lastest llama.cpp Repository**:
|
19 |
+
```bash
|
20 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
21 |
+
cd llama.cpp
|
22 |
+
```
|
23 |
+
|
24 |
+
|
25 |
+
2. **Build the Llama.cpp**:
|
26 |
+
|
27 |
+
Build llama.cpp as usual : https://github.com/ggml-org/llama.cpp#building-the-project
|
28 |
+
|
29 |
+
Once llama.cpp is built Copy the ./llama.cpp/build/bin/llama-gemma3-cli to a chosen folder.
|
30 |
+
|
31 |
+
3. **Download the Gemma 3 gguf file**:
|
32 |
+
|
33 |
+
https://huggingface.co/Mungert/gemma-3-12b-it-gguf/tree/main
|
34 |
+
|
35 |
+
Choose a gguf file without the mmproj in the name
|
36 |
+
|
37 |
+
Example gguf file : https://huggingface.co/Mungert/gemma-3-12b-it-gguf/resolve/main/google_gemma-3-12b-it-q4_k_l.gguf
|
38 |
+
|
39 |
+
Copy this file to your chosen folder.
|
40 |
+
|
41 |
+
4. **Download the Gemma 3 mmproj file**
|
42 |
+
|
43 |
+
https://huggingface.co/Mungert/gemma-3-12b-it-gguf/tree/main
|
44 |
+
|
45 |
+
Choose a file with mmproj in the name
|
46 |
+
|
47 |
+
Example mmproj file : https://huggingface.co/Mungert/gemma-3-12b-it-gguf/resolve/main/google_gemma-3-12b-it-mmproj-bf16.gguf
|
48 |
+
|
49 |
+
Copy this file to your chosen folder.
|
50 |
+
|
51 |
+
5. Copy images to the same folder as the gguf files or alter paths appropriately.
|
52 |
+
|
53 |
+
In the example below the gguf files, images and llama-gemma-cli are in the same folder.
|
54 |
+
|
55 |
+
Example image: image https://huggingface.co/Mungert/gemma-3-12b-it-gguf/resolve/main/car-1.jpg
|
56 |
+
|
57 |
+
Copy this file to your chosen folder.
|
58 |
+
|
59 |
+
6. **Run the CLI Tool**:
|
60 |
+
|
61 |
+
From your chosen folder :
|
62 |
+
|
63 |
+
```bash
|
64 |
+
llama-gemma3-cli -m google_gemma-3-12b-it-q4_k_l.gguf --mmproj google_gemma-3-12b-it-mmproj-bf16.gguf
|
65 |
+
```
|
66 |
+
|
67 |
+
```
|
68 |
+
Running in chat mode, available commands:
|
69 |
+
/image <path> load an image
|
70 |
+
/clear clear the chat history
|
71 |
+
/quit or /exit exit the program
|
72 |
+
|
73 |
+
> /image car-1.jpg
|
74 |
+
Encoding image car-1.jpg
|
75 |
+
Image encoded in 46305 ms
|
76 |
+
Image decoded in 19302 ms
|
77 |
+
|
78 |
+
> what is the image of
|
79 |
+
Here's a breakdown of what's in the image:
|
80 |
+
|
81 |
+
**Subject:** The primary subject is a black Porsche Panamera Turbo driving on a highway.
|
82 |
+
|
83 |
+
**Details:**
|
84 |
+
|
85 |
+
* **Car:** It's a sleek, modern Porsche Panamera Turbo, identifiable by its distinctive rear design, the "PORSCHE" lettering, and the "Panamera Turbo" badge. The license plate reads "CVC-911".
|
86 |
+
* **Setting:** The car is on a multi-lane highway, with a blurred background of trees, a distant building, and a cloudy sky. The lighting suggests it's either dusk or dawn.
|
87 |
+
* **Motion:** The image captures the car in motion, with a slight motion blur to convey speed.
|
88 |
+
|
89 |
+
**Overall Impression:** The image conveys a sense of speed, luxury, and power. It's a well-composed shot that highlights the car's design and performance.
|
90 |
+
|
91 |
+
Do you want me to describe any specific aspect of the image in more detail, or perhaps analyze its composition?
|
92 |
+
```
|
93 |
+
|
94 |
+
# <span id="testllm" style="color: #7F7FFF;">🚀 If you find these models useful</span>
|
95 |
+
|
96 |
+
Please click like ❤️ . Also I’d really appreciate it if you could test my Network Monitor Assistant at 👉 [Network Monitor Assitant](https://readyforquantum.com).
|
97 |
+
💬 Click the **chat icon** (bottom right of the main and dashboard pages) . Choose a LLM; toggle between the LLM Types TurboLLM -> FreeLLM -> TestLLM.
|
98 |
+
|
99 |
+
### What I'm Testing
|
100 |
+
I'm experimenting with **function calling** against my network monitoring service. Using small open source models. I am into the question "How small can it go and still function".
|
101 |
+
🟡 **TestLLM** – Runs **Phi-4-mini-instruct** using phi-4-mini-q4_0.gguf , llama.cpp on 6 threads of a Cpu VM (Should take about 15s to load. Inference speed is quite slow and it only processes one user prompt at a time—still working on scaling!). If you're curious, I'd be happy to share how it works! .
|
102 |
+
|
103 |
+
### The other Available AI Assistants
|
104 |
+
🟢 **TurboLLM** – Uses **gpt-4o-mini** Fast! . Note: tokens are limited since OpenAI models are pricey, but you can [Login](https://readyforquantum.com) or [Download](https://readyforquantum.com/download/?utm_source=huggingface&utm_medium=referral&utm_campaign=huggingface_repo_readme) the Quantum Network Monitor agent to get more tokens, Alternatively use the TestLLM .
|
105 |
+
🔵 **HugLLM** – Runs **open-source Hugging Face models** Fast, Runs small models (≈8B) hence lower quality, Get 2x more tokens (subject to Hugging Face API availability)
|
106 |
+
|
107 |
+
|
108 |
+
## **Choosing the Right Model Format**
|
109 |
+
|
110 |
+
Selecting the correct model format depends on your **hardware capabilities** and **memory constraints**.
|
111 |
+
|
112 |
+
### **BF16 (Brain Float 16) – Use if BF16 acceleration is available**
|
113 |
+
- A 16-bit floating-point format designed for **faster computation** while retaining good precision.
|
114 |
+
- Provides **similar dynamic range** as FP32 but with **lower memory usage**.
|
115 |
+
- Recommended if your hardware supports **BF16 acceleration** (check your device’s specs).
|
116 |
+
- Ideal for **high-performance inference** with **reduced memory footprint** compared to FP32.
|
117 |
+
|
118 |
+
📌 **Use BF16 if:**
|
119 |
+
✔ Your hardware has native **BF16 support** (e.g., newer GPUs, TPUs).
|
120 |
+
✔ You want **higher precision** while saving memory.
|
121 |
+
✔ You plan to **requantize** the model into another format.
|
122 |
+
|
123 |
+
📌 **Avoid BF16 if:**
|
124 |
+
❌ Your hardware does **not** support BF16 (it may fall back to FP32 and run slower).
|
125 |
+
❌ You need compatibility with older devices that lack BF16 optimization.
|
126 |
+
|
127 |
+
---
|
128 |
+
|
129 |
+
### **F16 (Float 16) – More widely supported than BF16**
|
130 |
+
- A 16-bit floating-point **high precision** but with less of range of values than BF16.
|
131 |
+
- Works on most devices with **FP16 acceleration support** (including many GPUs and some CPUs).
|
132 |
+
- Slightly lower numerical precision than BF16 but generally sufficient for inference.
|
133 |
+
|
134 |
+
📌 **Use F16 if:**
|
135 |
+
✔ Your hardware supports **FP16** but **not BF16**.
|
136 |
+
✔ You need a **balance between speed, memory usage, and accuracy**.
|
137 |
+
✔ You are running on a **GPU** or another device optimized for FP16 computations.
|
138 |
+
|
139 |
+
📌 **Avoid F16 if:**
|
140 |
+
❌ Your device lacks **native FP16 support** (it may run slower than expected).
|
141 |
+
❌ You have memory limtations.
|
142 |
+
|
143 |
+
---
|
144 |
+
|
145 |
+
### **Quantized Models (Q4_K, Q6_K, Q8, etc.) – For CPU & Low-VRAM Inference**
|
146 |
+
Quantization reduces model size and memory usage while maintaining as much accuracy as possible.
|
147 |
+
- **Lower-bit models (Q4_K)** → **Best for minimal memory usage**, may have lower precision.
|
148 |
+
- **Higher-bit models (Q6_K, Q8_0)** → **Better accuracy**, requires more memory.
|
149 |
+
|
150 |
+
📌 **Use Quantized Models if:**
|
151 |
+
✔ You are running inference on a **CPU** and need an optimized model.
|
152 |
+
✔ Your device has **low VRAM** and cannot load full-precision models.
|
153 |
+
✔ You want to reduce **memory footprint** while keeping reasonable accuracy.
|
154 |
+
|
155 |
+
📌 **Avoid Quantized Models if:**
|
156 |
+
❌ You need **maximum accuracy** (full-precision models are better for this).
|
157 |
+
❌ Your hardware has enough VRAM for higher-precision formats (BF16/F16).
|
158 |
+
|
159 |
+
---
|
160 |
+
|
161 |
+
### **Summary Table: Model Format Selection**
|
162 |
+
|
163 |
+
| Model Format | Precision | Memory Usage | Device Requirements | Best Use Case |
|
164 |
+
|--------------|------------|---------------|----------------------|---------------|
|
165 |
+
| **BF16** | Highest | High | BF16-supported GPU/CPUs | High-speed inference with reduced memory |
|
166 |
+
| **F16** | High | High | FP16-supported devices | GPU inference when BF16 isn’t available |
|
167 |
+
| **Q4_K** | Low | Very Low | CPU or Low-VRAM devices | Best for memory-constrained environments |
|
168 |
+
| **Q6_K** | Medium Low | Low | CPU with more memory | Better accuracy while still being quantized |
|
169 |
+
| **Q8** | Medium | Moderate | CPU or GPU with enough VRAM | Best accuracy among quantized models |
|
170 |
+
|
171 |
+
|
172 |
+
## **Included Files & Details**
|
173 |
+
|
174 |
+
### `google_gemma-3-12b-it-bf16.gguf`
|
175 |
+
- Model weights preserved in **BF16**.
|
176 |
+
- Use this if you want to **requantize** the model into a different format.
|
177 |
+
- Best if your device supports **BF16 acceleration**.
|
178 |
+
|
179 |
+
### `google_gemma-3-12b-it-f16.gguf`
|
180 |
+
- Model weights stored in **F16**.
|
181 |
+
- Use if your device supports **FP16**, especially if BF16 is not available.
|
182 |
+
|
183 |
+
### `google_gemma-3-12b-it-bf16-q8.gguf`
|
184 |
+
- **Output & embeddings** remain in **BF16**.
|
185 |
+
- All other layers quantized to **Q8_0**.
|
186 |
+
- Use if your device supports **BF16** and you want a quantized version.
|
187 |
+
|
188 |
+
### `google_gemma-3-12b-it-f16-q8.gguf`
|
189 |
+
- **Output & embeddings** remain in **F16**.
|
190 |
+
- All other layers quantized to **Q8_0**.
|
191 |
+
|
192 |
+
### `google_gemma-3-12b-it-q4_k_l.gguf`
|
193 |
+
- **Output & embeddings** quantized to **Q8_0**.
|
194 |
+
- All other layers quantized to **Q4_K**.
|
195 |
+
- Good for **CPU inference** with limited memory.
|
196 |
+
|
197 |
+
### `google_gemma-3-12b-it-q4_k_m.gguf`
|
198 |
+
- Similar to Q4_K.
|
199 |
+
- Another option for **low-memory CPU inference**.
|
200 |
+
|
201 |
+
### `google_gemma-3-12b-it-q4_k_s.gguf`
|
202 |
+
- Smallest **Q4_K** variant, using less memory at the cost of accuracy.
|
203 |
+
- Best for **very low-memory setups**.
|
204 |
+
|
205 |
+
### `google_gemma-3-12b-it-q6_k_l.gguf`
|
206 |
+
- **Output & embeddings** quantized to **Q8_0**.
|
207 |
+
- All other layers quantized to **Q6_K** .
|
208 |
+
|
209 |
+
### `google_gemma-3-12b-it-q6_k_m.gguf`
|
210 |
+
- A mid-range **Q6_K** quantized model for balanced performance .
|
211 |
+
- Suitable for **CPU-based inference** with **moderate memory**.
|
212 |
+
|
213 |
+
### `google_gemma-3-12b-it-q8.gguf`
|
214 |
+
- Fully **Q8** quantized model for better accuracy.
|
215 |
+
- Requires **more memory** but offers higher precision.
|
216 |
+
|
217 |
+
|
218 |
+
# Gemma 3 model card
|
219 |
+
|
220 |
+
**Model Page**: [Gemma](https://ai.google.dev/gemma/docs/core)
|
221 |
+
|
222 |
+
**Resources and Technical Documentation**:
|
223 |
+
|
224 |
+
* [Gemma 3 Technical Report][g3-tech-report]
|
225 |
+
* [Responsible Generative AI Toolkit][rai-toolkit]
|
226 |
+
* [Gemma on Kaggle][kaggle-gemma]
|
227 |
+
* [Gemma on Vertex Model Garden][vertex-mg-gemma3]
|
228 |
+
|
229 |
+
**Terms of Use**: [Terms][terms]
|
230 |
+
|
231 |
+
**Authors**: Google DeepMind
|
232 |
+
|
233 |
+
## Model Information
|
234 |
+
|
235 |
+
Summary description and brief definition of inputs and outputs.
|
236 |
+
|
237 |
+
### Description
|
238 |
+
|
239 |
+
Gemma is a family of lightweight, state-of-the-art open models from Google,
|
240 |
+
built from the same research and technology used to create the Gemini models.
|
241 |
+
Gemma 3 models are multimodal, handling text and image input and generating text
|
242 |
+
output, with open weights for both pre-trained variants and instruction-tuned
|
243 |
+
variants. Gemma 3 has a large, 128K context window, multilingual support in over
|
244 |
+
140 languages, and is available in more sizes than previous versions. Gemma 3
|
245 |
+
models are well-suited for a variety of text generation and image understanding
|
246 |
+
tasks, including question answering, summarization, and reasoning. Their
|
247 |
+
relatively small size makes it possible to deploy them in environments with
|
248 |
+
limited resources such as laptops, desktops or your own cloud infrastructure,
|
249 |
+
democratizing access to state of the art AI models and helping foster innovation
|
250 |
+
for everyone.
|
251 |
+
|
252 |
+
### Inputs and outputs
|
253 |
+
|
254 |
+
- **Input:**
|
255 |
+
- Text string, such as a question, a prompt, or a document to be summarized
|
256 |
+
- Images, normalized to 896 x 896 resolution and encoded to 256 tokens
|
257 |
+
each
|
258 |
+
- Total input context of 128K tokens for the 4B, 12B, and 27B sizes, and
|
259 |
+
32K tokens for the 1B size
|
260 |
+
|
261 |
+
- **Output:**
|
262 |
+
- Generated text in response to the input, such as an answer to a
|
263 |
+
question, analysis of image content, or a summary of a document
|
264 |
+
- Total output context of 8192 tokens
|
car-1.jpg
ADDED
![]() |
Git LFS Details
|
google_gemma-3-12b-it-bf16-q8.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3150fa074c3840afd7963ad669f740529ad737b8199a8867bcba938479a24fe3
|
3 |
+
size 13453668352
|
google_gemma-3-12b-it-bf16.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1491e21628cd5318eee2fb810bd3fbad6b76cb5f1d8ca03ac62e344a513c4280
|
3 |
+
size 23539658496
|
google_gemma-3-12b-it-f16-q8.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e295c862d99575f53362df887e9007f4c6f32cf59508109ee42be2defb489ca4
|
3 |
+
size 13453668352
|
google_gemma-3-12b-it-iq2_m.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8594dde632b726cfd5f6de3dd44819b666234bc0b43c9f5fb86cf19ec5f0e429
|
3 |
+
size 4310290432
|
google_gemma-3-12b-it-iq2_s.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:570901ac6e036c6410a41a6a148cabc9ab3920b441c03517849723e60649c5ff
|
3 |
+
size 4020539392
|
google_gemma-3-12b-it-iq2_xs.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:207f5c239551130a0e3e1d07726476e7a8e536e651c7fec539b71fd3dead5ece
|
3 |
+
size 3840090112
|
google_gemma-3-12b-it-iq3_m.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6380054cb1c27ec380b4c922569e570753d100c90b0c879eb0a3c0ef8d5a5af
|
3 |
+
size 5655519232
|
google_gemma-3-12b-it-iq3_s.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c9e28031bbac1a07ae8af1f9c0a3828ef1a4bdc1fbd326b5d4dd72865e36438
|
3 |
+
size 5458112512
|
google_gemma-3-12b-it-iq3_xs.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4718da6e7a65e971c149cc8ce1edc06ea8d714ab4c9545f676ff0b60647216
|
3 |
+
size 5205962752
|
google_gemma-3-12b-it-iq4_nl.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03e5bc7a192d66e5abd576ca81fb73dce6958fae5a4f68d0d5109a827bf72c2f
|
3 |
+
size 6886961152
|
google_gemma-3-12b-it-iq4_xs.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc48e90608ced0b545f39615f1c7065f3148a07afe92e24e02e1aa3c980c7914
|
3 |
+
size 6550761472
|
google_gemma-3-12b-it-mmproj-bf16.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87e5ba81aeddfc99bac8325a05f173b5f7024d447a1e3a21f40b265bdc0fe26f
|
3 |
+
size 854200224
|
google_gemma-3-12b-it-mmproj-f16.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30c02d056410848227001830866e0a269fcc28aaf8ca971bded494003de9f5a5
|
3 |
+
size 854200224
|
google_gemma-3-12b-it-mmproj-f32.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d45f67b21a6718c2f418962ae2c9823ab0b6264ec9d4bad586f173143b8d17b6
|
3 |
+
size 1685188512
|
google_gemma-3-12b-it-mmproj-q8_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd46cb64556fbd4118a944b5b27a8398c5e7b6111957dd0ca7eda472fd0e1f81
|
3 |
+
size 590179104
|
google_gemma-3-12b-it-q2_k_s.guuf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac2e6d9a130bbec01e73ac79968f2725032e1c35a4739cc81a3e61f71c9e252e
|
3 |
+
size 4448407552
|
google_gemma-3-12b-it-q3_k_m.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:223c9d42291e15cccd93871d534b8fd842d9fe821417983b17e1c76f9d026b0b
|
3 |
+
size 6008614912
|
google_gemma-3-12b-it-q3_k_s.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:139263cfc1a8f13e305f9df2475fe9a85a17e5d54a54cc8e64a8bca091659274
|
3 |
+
size 5458112512
|
google_gemma-3-12b-it-q4_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0d067ff720a59326f63b2ee5d6b416c029e0f09f436b6ff4e9825629c09e484
|
3 |
+
size 7130755072
|
google_gemma-3-12b-it-q4_1.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51f715066755a839b2a4dcaf0c33ed812b10d8ab7fc1fbea3c937cb71f4e3fa1
|
3 |
+
size 7803154432
|
google_gemma-3-12b-it-q4_k_m.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1db0ccbe6e12e3b5ab7f3c23c4dc290d06f2cf2987cca4cc83a0f67e4dc7ded2
|
3 |
+
size 7300575232
|
google_gemma-3-12b-it-q4_k_s.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7e7dabc065cf9d2096601c23e8986976dceec29bba478c27afe064dd8b83bf1
|
3 |
+
size 6935130112
|
google_gemma-3-12b-it-q5_k_m.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10fa539214cbbca169c5264f08fafe944625f41ba0a4c49ee8f25fb2b0a4c7b9
|
3 |
+
size 8444833792
|
google_gemma-3-12b-it-q5_k_s.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:426e15f8063333a47d8b9419a408b3160f548d36286b2595a6ee6d50ac90117b
|
3 |
+
size 8231759872
|
google_gemma-3-12b-it-q6_k_m.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:795df22e08ebe7e99b91c19203c5fa5dbfc021f0d1f46d008f99a859efb15ef3
|
3 |
+
size 9660608512
|
google_gemma-3-12b-it-q8.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67773ea1deafd3d43df4052a7b19220987edd9817bb3b0f17001586b75a38abd
|
3 |
+
size 12509949696
|
google_gemma-3-12b-it-tq1_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19217da4f71089754ca56b2755ad1d8037a690f1fa6d9ab37418189195bb1b5c
|
3 |
+
size 3104714752
|
google_gemma-3-12b-it-tq2_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ad2f2aaa4f599829583995cd2d9a671e672667dc08e5c3bb58dc625b901dd45
|
3 |
+
size 3609014272
|