pcuenq HF Staff commited on
Commit
d6141b8
·
verified ·
1 Parent(s): 9c3a84d

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +1361 -0
config.json ADDED
@@ -0,0 +1,1361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": null,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": [
11
+ 1,
12
+ 106
13
+ ],
14
+ "final_logit_softcapping": null,
15
+ "head_dim": 256,
16
+ "hidden_activation": "gelu_pytorch_tanh",
17
+ "hidden_size": 1152,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 6912,
20
+ "max_position_embeddings": 32768,
21
+ "model_type": "gemma3_text",
22
+ "num_attention_heads": 4,
23
+ "num_hidden_layers": 26,
24
+ "num_key_value_heads": 1,
25
+ "pad_token_id": 0,
26
+ "quantization": {
27
+ "group_size": 64,
28
+ "bits": 2,
29
+ "model.embed_tokens": true,
30
+ "model.layers.0.self_attn.q_proj": {
31
+ "bits": 6,
32
+ "group_size": 64
33
+ },
34
+ "model.layers.0.self_attn.k_proj": true,
35
+ "model.layers.0.self_attn.v_proj": {
36
+ "bits": 6,
37
+ "group_size": 64
38
+ },
39
+ "model.layers.0.self_attn.o_proj": {
40
+ "bits": 6,
41
+ "group_size": 64
42
+ },
43
+ "model.layers.0.self_attn.q_norm": false,
44
+ "model.layers.0.self_attn.k_norm": false,
45
+ "model.layers.0.self_attn.rope": false,
46
+ "model.layers.0.mlp.gate_proj": {
47
+ "bits": 6,
48
+ "group_size": 64
49
+ },
50
+ "model.layers.0.mlp.down_proj": true,
51
+ "model.layers.0.mlp.up_proj": {
52
+ "bits": 6,
53
+ "group_size": 64
54
+ },
55
+ "model.layers.0.input_layernorm": false,
56
+ "model.layers.0.post_attention_layernorm": false,
57
+ "model.layers.0.pre_feedforward_layernorm": false,
58
+ "model.layers.0.post_feedforward_layernorm": false,
59
+ "model.layers.1.self_attn.q_proj": {
60
+ "bits": 6,
61
+ "group_size": 64
62
+ },
63
+ "model.layers.1.self_attn.k_proj": true,
64
+ "model.layers.1.self_attn.v_proj": {
65
+ "bits": 6,
66
+ "group_size": 64
67
+ },
68
+ "model.layers.1.self_attn.o_proj": {
69
+ "bits": 6,
70
+ "group_size": 64
71
+ },
72
+ "model.layers.1.self_attn.q_norm": false,
73
+ "model.layers.1.self_attn.k_norm": false,
74
+ "model.layers.1.self_attn.rope": false,
75
+ "model.layers.1.mlp.gate_proj": true,
76
+ "model.layers.1.mlp.down_proj": true,
77
+ "model.layers.1.mlp.up_proj": {
78
+ "bits": 6,
79
+ "group_size": 64
80
+ },
81
+ "model.layers.1.input_layernorm": false,
82
+ "model.layers.1.post_attention_layernorm": false,
83
+ "model.layers.1.pre_feedforward_layernorm": false,
84
+ "model.layers.1.post_feedforward_layernorm": false,
85
+ "model.layers.2.self_attn.q_proj": true,
86
+ "model.layers.2.self_attn.k_proj": true,
87
+ "model.layers.2.self_attn.v_proj": {
88
+ "bits": 6,
89
+ "group_size": 64
90
+ },
91
+ "model.layers.2.self_attn.o_proj": {
92
+ "bits": 6,
93
+ "group_size": 64
94
+ },
95
+ "model.layers.2.self_attn.q_norm": false,
96
+ "model.layers.2.self_attn.k_norm": false,
97
+ "model.layers.2.self_attn.rope": false,
98
+ "model.layers.2.mlp.gate_proj": true,
99
+ "model.layers.2.mlp.down_proj": true,
100
+ "model.layers.2.mlp.up_proj": {
101
+ "bits": 6,
102
+ "group_size": 64
103
+ },
104
+ "model.layers.2.input_layernorm": false,
105
+ "model.layers.2.post_attention_layernorm": false,
106
+ "model.layers.2.pre_feedforward_layernorm": false,
107
+ "model.layers.2.post_feedforward_layernorm": false,
108
+ "model.layers.3.self_attn.q_proj": true,
109
+ "model.layers.3.self_attn.k_proj": {
110
+ "bits": 6,
111
+ "group_size": 64
112
+ },
113
+ "model.layers.3.self_attn.v_proj": {
114
+ "bits": 6,
115
+ "group_size": 64
116
+ },
117
+ "model.layers.3.self_attn.o_proj": true,
118
+ "model.layers.3.self_attn.q_norm": false,
119
+ "model.layers.3.self_attn.k_norm": false,
120
+ "model.layers.3.self_attn.rope": false,
121
+ "model.layers.3.mlp.gate_proj": {
122
+ "bits": 6,
123
+ "group_size": 64
124
+ },
125
+ "model.layers.3.mlp.down_proj": true,
126
+ "model.layers.3.mlp.up_proj": true,
127
+ "model.layers.3.input_layernorm": false,
128
+ "model.layers.3.post_attention_layernorm": false,
129
+ "model.layers.3.pre_feedforward_layernorm": false,
130
+ "model.layers.3.post_feedforward_layernorm": false,
131
+ "model.layers.4.self_attn.q_proj": {
132
+ "bits": 6,
133
+ "group_size": 64
134
+ },
135
+ "model.layers.4.self_attn.k_proj": {
136
+ "bits": 6,
137
+ "group_size": 64
138
+ },
139
+ "model.layers.4.self_attn.v_proj": true,
140
+ "model.layers.4.self_attn.o_proj": true,
141
+ "model.layers.4.self_attn.q_norm": false,
142
+ "model.layers.4.self_attn.k_norm": false,
143
+ "model.layers.4.self_attn.rope": false,
144
+ "model.layers.4.mlp.gate_proj": {
145
+ "bits": 6,
146
+ "group_size": 64
147
+ },
148
+ "model.layers.4.mlp.down_proj": true,
149
+ "model.layers.4.mlp.up_proj": {
150
+ "bits": 6,
151
+ "group_size": 64
152
+ },
153
+ "model.layers.4.input_layernorm": false,
154
+ "model.layers.4.post_attention_layernorm": false,
155
+ "model.layers.4.pre_feedforward_layernorm": false,
156
+ "model.layers.4.post_feedforward_layernorm": false,
157
+ "model.layers.5.self_attn.q_proj": true,
158
+ "model.layers.5.self_attn.k_proj": {
159
+ "bits": 6,
160
+ "group_size": 64
161
+ },
162
+ "model.layers.5.self_attn.v_proj": {
163
+ "bits": 6,
164
+ "group_size": 64
165
+ },
166
+ "model.layers.5.self_attn.o_proj": {
167
+ "bits": 6,
168
+ "group_size": 64
169
+ },
170
+ "model.layers.5.self_attn.q_norm": false,
171
+ "model.layers.5.self_attn.k_norm": false,
172
+ "model.layers.5.self_attn.rope": false,
173
+ "model.layers.5.mlp.gate_proj": true,
174
+ "model.layers.5.mlp.down_proj": {
175
+ "bits": 6,
176
+ "group_size": 64
177
+ },
178
+ "model.layers.5.mlp.up_proj": true,
179
+ "model.layers.5.input_layernorm": false,
180
+ "model.layers.5.post_attention_layernorm": false,
181
+ "model.layers.5.pre_feedforward_layernorm": false,
182
+ "model.layers.5.post_feedforward_layernorm": false,
183
+ "model.layers.6.self_attn.q_proj": true,
184
+ "model.layers.6.self_attn.k_proj": {
185
+ "bits": 6,
186
+ "group_size": 64
187
+ },
188
+ "model.layers.6.self_attn.v_proj": {
189
+ "bits": 6,
190
+ "group_size": 64
191
+ },
192
+ "model.layers.6.self_attn.o_proj": {
193
+ "bits": 6,
194
+ "group_size": 64
195
+ },
196
+ "model.layers.6.self_attn.q_norm": false,
197
+ "model.layers.6.self_attn.k_norm": false,
198
+ "model.layers.6.self_attn.rope": false,
199
+ "model.layers.6.mlp.gate_proj": {
200
+ "bits": 6,
201
+ "group_size": 64
202
+ },
203
+ "model.layers.6.mlp.down_proj": true,
204
+ "model.layers.6.mlp.up_proj": {
205
+ "bits": 6,
206
+ "group_size": 64
207
+ },
208
+ "model.layers.6.input_layernorm": false,
209
+ "model.layers.6.post_attention_layernorm": false,
210
+ "model.layers.6.pre_feedforward_layernorm": false,
211
+ "model.layers.6.post_feedforward_layernorm": false,
212
+ "model.layers.7.self_attn.q_proj": true,
213
+ "model.layers.7.self_attn.k_proj": true,
214
+ "model.layers.7.self_attn.v_proj": {
215
+ "bits": 6,
216
+ "group_size": 64
217
+ },
218
+ "model.layers.7.self_attn.o_proj": {
219
+ "bits": 6,
220
+ "group_size": 64
221
+ },
222
+ "model.layers.7.self_attn.q_norm": false,
223
+ "model.layers.7.self_attn.k_norm": false,
224
+ "model.layers.7.self_attn.rope": false,
225
+ "model.layers.7.mlp.gate_proj": {
226
+ "bits": 6,
227
+ "group_size": 64
228
+ },
229
+ "model.layers.7.mlp.down_proj": {
230
+ "bits": 6,
231
+ "group_size": 64
232
+ },
233
+ "model.layers.7.mlp.up_proj": {
234
+ "bits": 6,
235
+ "group_size": 64
236
+ },
237
+ "model.layers.7.input_layernorm": false,
238
+ "model.layers.7.post_attention_layernorm": false,
239
+ "model.layers.7.pre_feedforward_layernorm": false,
240
+ "model.layers.7.post_feedforward_layernorm": false,
241
+ "model.layers.8.self_attn.q_proj": true,
242
+ "model.layers.8.self_attn.k_proj": {
243
+ "bits": 6,
244
+ "group_size": 64
245
+ },
246
+ "model.layers.8.self_attn.v_proj": {
247
+ "bits": 6,
248
+ "group_size": 64
249
+ },
250
+ "model.layers.8.self_attn.o_proj": {
251
+ "bits": 6,
252
+ "group_size": 64
253
+ },
254
+ "model.layers.8.self_attn.q_norm": false,
255
+ "model.layers.8.self_attn.k_norm": false,
256
+ "model.layers.8.self_attn.rope": false,
257
+ "model.layers.8.mlp.gate_proj": true,
258
+ "model.layers.8.mlp.down_proj": {
259
+ "bits": 6,
260
+ "group_size": 64
261
+ },
262
+ "model.layers.8.mlp.up_proj": {
263
+ "bits": 6,
264
+ "group_size": 64
265
+ },
266
+ "model.layers.8.input_layernorm": false,
267
+ "model.layers.8.post_attention_layernorm": false,
268
+ "model.layers.8.pre_feedforward_layernorm": false,
269
+ "model.layers.8.post_feedforward_layernorm": false,
270
+ "model.layers.9.self_attn.q_proj": true,
271
+ "model.layers.9.self_attn.k_proj": {
272
+ "bits": 6,
273
+ "group_size": 64
274
+ },
275
+ "model.layers.9.self_attn.v_proj": {
276
+ "bits": 6,
277
+ "group_size": 64
278
+ },
279
+ "model.layers.9.self_attn.o_proj": true,
280
+ "model.layers.9.self_attn.q_norm": false,
281
+ "model.layers.9.self_attn.k_norm": false,
282
+ "model.layers.9.self_attn.rope": false,
283
+ "model.layers.9.mlp.gate_proj": {
284
+ "bits": 6,
285
+ "group_size": 64
286
+ },
287
+ "model.layers.9.mlp.down_proj": {
288
+ "bits": 6,
289
+ "group_size": 64
290
+ },
291
+ "model.layers.9.mlp.up_proj": true,
292
+ "model.layers.9.input_layernorm": false,
293
+ "model.layers.9.post_attention_layernorm": false,
294
+ "model.layers.9.pre_feedforward_layernorm": false,
295
+ "model.layers.9.post_feedforward_layernorm": false,
296
+ "model.layers.10.self_attn.q_proj": {
297
+ "bits": 6,
298
+ "group_size": 64
299
+ },
300
+ "model.layers.10.self_attn.k_proj": {
301
+ "bits": 6,
302
+ "group_size": 64
303
+ },
304
+ "model.layers.10.self_attn.v_proj": true,
305
+ "model.layers.10.self_attn.o_proj": {
306
+ "bits": 6,
307
+ "group_size": 64
308
+ },
309
+ "model.layers.10.self_attn.q_norm": false,
310
+ "model.layers.10.self_attn.k_norm": false,
311
+ "model.layers.10.self_attn.rope": false,
312
+ "model.layers.10.mlp.gate_proj": {
313
+ "bits": 6,
314
+ "group_size": 64
315
+ },
316
+ "model.layers.10.mlp.down_proj": {
317
+ "bits": 6,
318
+ "group_size": 64
319
+ },
320
+ "model.layers.10.mlp.up_proj": true,
321
+ "model.layers.10.input_layernorm": false,
322
+ "model.layers.10.post_attention_layernorm": false,
323
+ "model.layers.10.pre_feedforward_layernorm": false,
324
+ "model.layers.10.post_feedforward_layernorm": false,
325
+ "model.layers.11.self_attn.q_proj": {
326
+ "bits": 6,
327
+ "group_size": 64
328
+ },
329
+ "model.layers.11.self_attn.k_proj": true,
330
+ "model.layers.11.self_attn.v_proj": {
331
+ "bits": 6,
332
+ "group_size": 64
333
+ },
334
+ "model.layers.11.self_attn.o_proj": {
335
+ "bits": 6,
336
+ "group_size": 64
337
+ },
338
+ "model.layers.11.self_attn.q_norm": false,
339
+ "model.layers.11.self_attn.k_norm": false,
340
+ "model.layers.11.self_attn.rope": false,
341
+ "model.layers.11.mlp.gate_proj": {
342
+ "bits": 6,
343
+ "group_size": 64
344
+ },
345
+ "model.layers.11.mlp.down_proj": true,
346
+ "model.layers.11.mlp.up_proj": true,
347
+ "model.layers.11.input_layernorm": false,
348
+ "model.layers.11.post_attention_layernorm": false,
349
+ "model.layers.11.pre_feedforward_layernorm": false,
350
+ "model.layers.11.post_feedforward_layernorm": false,
351
+ "model.layers.12.self_attn.q_proj": true,
352
+ "model.layers.12.self_attn.k_proj": {
353
+ "bits": 6,
354
+ "group_size": 64
355
+ },
356
+ "model.layers.12.self_attn.v_proj": true,
357
+ "model.layers.12.self_attn.o_proj": {
358
+ "bits": 6,
359
+ "group_size": 64
360
+ },
361
+ "model.layers.12.self_attn.q_norm": false,
362
+ "model.layers.12.self_attn.k_norm": false,
363
+ "model.layers.12.self_attn.rope": false,
364
+ "model.layers.12.mlp.gate_proj": true,
365
+ "model.layers.12.mlp.down_proj": true,
366
+ "model.layers.12.mlp.up_proj": {
367
+ "bits": 6,
368
+ "group_size": 64
369
+ },
370
+ "model.layers.12.input_layernorm": false,
371
+ "model.layers.12.post_attention_layernorm": false,
372
+ "model.layers.12.pre_feedforward_layernorm": false,
373
+ "model.layers.12.post_feedforward_layernorm": false,
374
+ "model.layers.13.self_attn.q_proj": true,
375
+ "model.layers.13.self_attn.k_proj": {
376
+ "bits": 6,
377
+ "group_size": 64
378
+ },
379
+ "model.layers.13.self_attn.v_proj": true,
380
+ "model.layers.13.self_attn.o_proj": {
381
+ "bits": 6,
382
+ "group_size": 64
383
+ },
384
+ "model.layers.13.self_attn.q_norm": false,
385
+ "model.layers.13.self_attn.k_norm": false,
386
+ "model.layers.13.self_attn.rope": false,
387
+ "model.layers.13.mlp.gate_proj": true,
388
+ "model.layers.13.mlp.down_proj": {
389
+ "bits": 6,
390
+ "group_size": 64
391
+ },
392
+ "model.layers.13.mlp.up_proj": true,
393
+ "model.layers.13.input_layernorm": false,
394
+ "model.layers.13.post_attention_layernorm": false,
395
+ "model.layers.13.pre_feedforward_layernorm": false,
396
+ "model.layers.13.post_feedforward_layernorm": false,
397
+ "model.layers.14.self_attn.q_proj": {
398
+ "bits": 6,
399
+ "group_size": 64
400
+ },
401
+ "model.layers.14.self_attn.k_proj": {
402
+ "bits": 6,
403
+ "group_size": 64
404
+ },
405
+ "model.layers.14.self_attn.v_proj": {
406
+ "bits": 6,
407
+ "group_size": 64
408
+ },
409
+ "model.layers.14.self_attn.o_proj": {
410
+ "bits": 6,
411
+ "group_size": 64
412
+ },
413
+ "model.layers.14.self_attn.q_norm": false,
414
+ "model.layers.14.self_attn.k_norm": false,
415
+ "model.layers.14.self_attn.rope": false,
416
+ "model.layers.14.mlp.gate_proj": {
417
+ "bits": 6,
418
+ "group_size": 64
419
+ },
420
+ "model.layers.14.mlp.down_proj": true,
421
+ "model.layers.14.mlp.up_proj": true,
422
+ "model.layers.14.input_layernorm": false,
423
+ "model.layers.14.post_attention_layernorm": false,
424
+ "model.layers.14.pre_feedforward_layernorm": false,
425
+ "model.layers.14.post_feedforward_layernorm": false,
426
+ "model.layers.15.self_attn.q_proj": {
427
+ "bits": 6,
428
+ "group_size": 64
429
+ },
430
+ "model.layers.15.self_attn.k_proj": {
431
+ "bits": 6,
432
+ "group_size": 64
433
+ },
434
+ "model.layers.15.self_attn.v_proj": true,
435
+ "model.layers.15.self_attn.o_proj": {
436
+ "bits": 6,
437
+ "group_size": 64
438
+ },
439
+ "model.layers.15.self_attn.q_norm": false,
440
+ "model.layers.15.self_attn.k_norm": false,
441
+ "model.layers.15.self_attn.rope": false,
442
+ "model.layers.15.mlp.gate_proj": {
443
+ "bits": 6,
444
+ "group_size": 64
445
+ },
446
+ "model.layers.15.mlp.down_proj": true,
447
+ "model.layers.15.mlp.up_proj": true,
448
+ "model.layers.15.input_layernorm": false,
449
+ "model.layers.15.post_attention_layernorm": false,
450
+ "model.layers.15.pre_feedforward_layernorm": false,
451
+ "model.layers.15.post_feedforward_layernorm": false,
452
+ "model.layers.16.self_attn.q_proj": {
453
+ "bits": 6,
454
+ "group_size": 64
455
+ },
456
+ "model.layers.16.self_attn.k_proj": {
457
+ "bits": 6,
458
+ "group_size": 64
459
+ },
460
+ "model.layers.16.self_attn.v_proj": {
461
+ "bits": 6,
462
+ "group_size": 64
463
+ },
464
+ "model.layers.16.self_attn.o_proj": {
465
+ "bits": 6,
466
+ "group_size": 64
467
+ },
468
+ "model.layers.16.self_attn.q_norm": false,
469
+ "model.layers.16.self_attn.k_norm": false,
470
+ "model.layers.16.self_attn.rope": false,
471
+ "model.layers.16.mlp.gate_proj": {
472
+ "bits": 6,
473
+ "group_size": 64
474
+ },
475
+ "model.layers.16.mlp.down_proj": {
476
+ "bits": 6,
477
+ "group_size": 64
478
+ },
479
+ "model.layers.16.mlp.up_proj": true,
480
+ "model.layers.16.input_layernorm": false,
481
+ "model.layers.16.post_attention_layernorm": false,
482
+ "model.layers.16.pre_feedforward_layernorm": false,
483
+ "model.layers.16.post_feedforward_layernorm": false,
484
+ "model.layers.17.self_attn.q_proj": {
485
+ "bits": 6,
486
+ "group_size": 64
487
+ },
488
+ "model.layers.17.self_attn.k_proj": {
489
+ "bits": 6,
490
+ "group_size": 64
491
+ },
492
+ "model.layers.17.self_attn.v_proj": true,
493
+ "model.layers.17.self_attn.o_proj": {
494
+ "bits": 6,
495
+ "group_size": 64
496
+ },
497
+ "model.layers.17.self_attn.q_norm": false,
498
+ "model.layers.17.self_attn.k_norm": false,
499
+ "model.layers.17.self_attn.rope": false,
500
+ "model.layers.17.mlp.gate_proj": {
501
+ "bits": 6,
502
+ "group_size": 64
503
+ },
504
+ "model.layers.17.mlp.down_proj": {
505
+ "bits": 6,
506
+ "group_size": 64
507
+ },
508
+ "model.layers.17.mlp.up_proj": true,
509
+ "model.layers.17.input_layernorm": false,
510
+ "model.layers.17.post_attention_layernorm": false,
511
+ "model.layers.17.pre_feedforward_layernorm": false,
512
+ "model.layers.17.post_feedforward_layernorm": false,
513
+ "model.layers.18.self_attn.q_proj": true,
514
+ "model.layers.18.self_attn.k_proj": {
515
+ "bits": 6,
516
+ "group_size": 64
517
+ },
518
+ "model.layers.18.self_attn.v_proj": true,
519
+ "model.layers.18.self_attn.o_proj": true,
520
+ "model.layers.18.self_attn.q_norm": false,
521
+ "model.layers.18.self_attn.k_norm": false,
522
+ "model.layers.18.self_attn.rope": false,
523
+ "model.layers.18.mlp.gate_proj": true,
524
+ "model.layers.18.mlp.down_proj": {
525
+ "bits": 6,
526
+ "group_size": 64
527
+ },
528
+ "model.layers.18.mlp.up_proj": {
529
+ "bits": 6,
530
+ "group_size": 64
531
+ },
532
+ "model.layers.18.input_layernorm": false,
533
+ "model.layers.18.post_attention_layernorm": false,
534
+ "model.layers.18.pre_feedforward_layernorm": false,
535
+ "model.layers.18.post_feedforward_layernorm": false,
536
+ "model.layers.19.self_attn.q_proj": true,
537
+ "model.layers.19.self_attn.k_proj": true,
538
+ "model.layers.19.self_attn.v_proj": {
539
+ "bits": 6,
540
+ "group_size": 64
541
+ },
542
+ "model.layers.19.self_attn.o_proj": {
543
+ "bits": 6,
544
+ "group_size": 64
545
+ },
546
+ "model.layers.19.self_attn.q_norm": false,
547
+ "model.layers.19.self_attn.k_norm": false,
548
+ "model.layers.19.self_attn.rope": false,
549
+ "model.layers.19.mlp.gate_proj": true,
550
+ "model.layers.19.mlp.down_proj": true,
551
+ "model.layers.19.mlp.up_proj": true,
552
+ "model.layers.19.input_layernorm": false,
553
+ "model.layers.19.post_attention_layernorm": false,
554
+ "model.layers.19.pre_feedforward_layernorm": false,
555
+ "model.layers.19.post_feedforward_layernorm": false,
556
+ "model.layers.20.self_attn.q_proj": true,
557
+ "model.layers.20.self_attn.k_proj": true,
558
+ "model.layers.20.self_attn.v_proj": true,
559
+ "model.layers.20.self_attn.o_proj": {
560
+ "bits": 6,
561
+ "group_size": 64
562
+ },
563
+ "model.layers.20.self_attn.q_norm": false,
564
+ "model.layers.20.self_attn.k_norm": false,
565
+ "model.layers.20.self_attn.rope": false,
566
+ "model.layers.20.mlp.gate_proj": true,
567
+ "model.layers.20.mlp.down_proj": true,
568
+ "model.layers.20.mlp.up_proj": true,
569
+ "model.layers.20.input_layernorm": false,
570
+ "model.layers.20.post_attention_layernorm": false,
571
+ "model.layers.20.pre_feedforward_layernorm": false,
572
+ "model.layers.20.post_feedforward_layernorm": false,
573
+ "model.layers.21.self_attn.q_proj": {
574
+ "bits": 6,
575
+ "group_size": 64
576
+ },
577
+ "model.layers.21.self_attn.k_proj": {
578
+ "bits": 6,
579
+ "group_size": 64
580
+ },
581
+ "model.layers.21.self_attn.v_proj": {
582
+ "bits": 6,
583
+ "group_size": 64
584
+ },
585
+ "model.layers.21.self_attn.o_proj": true,
586
+ "model.layers.21.self_attn.q_norm": false,
587
+ "model.layers.21.self_attn.k_norm": false,
588
+ "model.layers.21.self_attn.rope": false,
589
+ "model.layers.21.mlp.gate_proj": {
590
+ "bits": 6,
591
+ "group_size": 64
592
+ },
593
+ "model.layers.21.mlp.down_proj": true,
594
+ "model.layers.21.mlp.up_proj": true,
595
+ "model.layers.21.input_layernorm": false,
596
+ "model.layers.21.post_attention_layernorm": false,
597
+ "model.layers.21.pre_feedforward_layernorm": false,
598
+ "model.layers.21.post_feedforward_layernorm": false,
599
+ "model.layers.22.self_attn.q_proj": {
600
+ "bits": 6,
601
+ "group_size": 64
602
+ },
603
+ "model.layers.22.self_attn.k_proj": {
604
+ "bits": 6,
605
+ "group_size": 64
606
+ },
607
+ "model.layers.22.self_attn.v_proj": true,
608
+ "model.layers.22.self_attn.o_proj": {
609
+ "bits": 6,
610
+ "group_size": 64
611
+ },
612
+ "model.layers.22.self_attn.q_norm": false,
613
+ "model.layers.22.self_attn.k_norm": false,
614
+ "model.layers.22.self_attn.rope": false,
615
+ "model.layers.22.mlp.gate_proj": true,
616
+ "model.layers.22.mlp.down_proj": true,
617
+ "model.layers.22.mlp.up_proj": true,
618
+ "model.layers.22.input_layernorm": false,
619
+ "model.layers.22.post_attention_layernorm": false,
620
+ "model.layers.22.pre_feedforward_layernorm": false,
621
+ "model.layers.22.post_feedforward_layernorm": false,
622
+ "model.layers.23.self_attn.q_proj": true,
623
+ "model.layers.23.self_attn.k_proj": {
624
+ "bits": 6,
625
+ "group_size": 64
626
+ },
627
+ "model.layers.23.self_attn.v_proj": true,
628
+ "model.layers.23.self_attn.o_proj": {
629
+ "bits": 6,
630
+ "group_size": 64
631
+ },
632
+ "model.layers.23.self_attn.q_norm": false,
633
+ "model.layers.23.self_attn.k_norm": false,
634
+ "model.layers.23.self_attn.rope": false,
635
+ "model.layers.23.mlp.gate_proj": true,
636
+ "model.layers.23.mlp.down_proj": true,
637
+ "model.layers.23.mlp.up_proj": true,
638
+ "model.layers.23.input_layernorm": false,
639
+ "model.layers.23.post_attention_layernorm": false,
640
+ "model.layers.23.pre_feedforward_layernorm": false,
641
+ "model.layers.23.post_feedforward_layernorm": false,
642
+ "model.layers.24.self_attn.q_proj": {
643
+ "bits": 6,
644
+ "group_size": 64
645
+ },
646
+ "model.layers.24.self_attn.k_proj": {
647
+ "bits": 6,
648
+ "group_size": 64
649
+ },
650
+ "model.layers.24.self_attn.v_proj": true,
651
+ "model.layers.24.self_attn.o_proj": true,
652
+ "model.layers.24.self_attn.q_norm": false,
653
+ "model.layers.24.self_attn.k_norm": false,
654
+ "model.layers.24.self_attn.rope": false,
655
+ "model.layers.24.mlp.gate_proj": true,
656
+ "model.layers.24.mlp.down_proj": true,
657
+ "model.layers.24.mlp.up_proj": true,
658
+ "model.layers.24.input_layernorm": false,
659
+ "model.layers.24.post_attention_layernorm": false,
660
+ "model.layers.24.pre_feedforward_layernorm": false,
661
+ "model.layers.24.post_feedforward_layernorm": false,
662
+ "model.layers.25.self_attn.q_proj": {
663
+ "bits": 6,
664
+ "group_size": 64
665
+ },
666
+ "model.layers.25.self_attn.k_proj": {
667
+ "bits": 6,
668
+ "group_size": 64
669
+ },
670
+ "model.layers.25.self_attn.v_proj": true,
671
+ "model.layers.25.self_attn.o_proj": true,
672
+ "model.layers.25.self_attn.q_norm": false,
673
+ "model.layers.25.self_attn.k_norm": false,
674
+ "model.layers.25.self_attn.rope": false,
675
+ "model.layers.25.mlp.gate_proj": true,
676
+ "model.layers.25.mlp.down_proj": true,
677
+ "model.layers.25.mlp.up_proj": true,
678
+ "model.layers.25.input_layernorm": false,
679
+ "model.layers.25.post_attention_layernorm": false,
680
+ "model.layers.25.pre_feedforward_layernorm": false,
681
+ "model.layers.25.post_feedforward_layernorm": false,
682
+ "model.norm": false,
683
+ "lm_head": {
684
+ "bits": 6,
685
+ "group_size": 64
686
+ }
687
+ },
688
+ "quantization_config": {
689
+ "group_size": 64,
690
+ "bits": 2,
691
+ "model.embed_tokens": true,
692
+ "model.layers.0.self_attn.q_proj": {
693
+ "bits": 6,
694
+ "group_size": 64
695
+ },
696
+ "model.layers.0.self_attn.k_proj": true,
697
+ "model.layers.0.self_attn.v_proj": {
698
+ "bits": 6,
699
+ "group_size": 64
700
+ },
701
+ "model.layers.0.self_attn.o_proj": {
702
+ "bits": 6,
703
+ "group_size": 64
704
+ },
705
+ "model.layers.0.self_attn.q_norm": false,
706
+ "model.layers.0.self_attn.k_norm": false,
707
+ "model.layers.0.self_attn.rope": false,
708
+ "model.layers.0.mlp.gate_proj": {
709
+ "bits": 6,
710
+ "group_size": 64
711
+ },
712
+ "model.layers.0.mlp.down_proj": true,
713
+ "model.layers.0.mlp.up_proj": {
714
+ "bits": 6,
715
+ "group_size": 64
716
+ },
717
+ "model.layers.0.input_layernorm": false,
718
+ "model.layers.0.post_attention_layernorm": false,
719
+ "model.layers.0.pre_feedforward_layernorm": false,
720
+ "model.layers.0.post_feedforward_layernorm": false,
721
+ "model.layers.1.self_attn.q_proj": {
722
+ "bits": 6,
723
+ "group_size": 64
724
+ },
725
+ "model.layers.1.self_attn.k_proj": true,
726
+ "model.layers.1.self_attn.v_proj": {
727
+ "bits": 6,
728
+ "group_size": 64
729
+ },
730
+ "model.layers.1.self_attn.o_proj": {
731
+ "bits": 6,
732
+ "group_size": 64
733
+ },
734
+ "model.layers.1.self_attn.q_norm": false,
735
+ "model.layers.1.self_attn.k_norm": false,
736
+ "model.layers.1.self_attn.rope": false,
737
+ "model.layers.1.mlp.gate_proj": true,
738
+ "model.layers.1.mlp.down_proj": true,
739
+ "model.layers.1.mlp.up_proj": {
740
+ "bits": 6,
741
+ "group_size": 64
742
+ },
743
+ "model.layers.1.input_layernorm": false,
744
+ "model.layers.1.post_attention_layernorm": false,
745
+ "model.layers.1.pre_feedforward_layernorm": false,
746
+ "model.layers.1.post_feedforward_layernorm": false,
747
+ "model.layers.2.self_attn.q_proj": true,
748
+ "model.layers.2.self_attn.k_proj": true,
749
+ "model.layers.2.self_attn.v_proj": {
750
+ "bits": 6,
751
+ "group_size": 64
752
+ },
753
+ "model.layers.2.self_attn.o_proj": {
754
+ "bits": 6,
755
+ "group_size": 64
756
+ },
757
+ "model.layers.2.self_attn.q_norm": false,
758
+ "model.layers.2.self_attn.k_norm": false,
759
+ "model.layers.2.self_attn.rope": false,
760
+ "model.layers.2.mlp.gate_proj": true,
761
+ "model.layers.2.mlp.down_proj": true,
762
+ "model.layers.2.mlp.up_proj": {
763
+ "bits": 6,
764
+ "group_size": 64
765
+ },
766
+ "model.layers.2.input_layernorm": false,
767
+ "model.layers.2.post_attention_layernorm": false,
768
+ "model.layers.2.pre_feedforward_layernorm": false,
769
+ "model.layers.2.post_feedforward_layernorm": false,
770
+ "model.layers.3.self_attn.q_proj": true,
771
+ "model.layers.3.self_attn.k_proj": {
772
+ "bits": 6,
773
+ "group_size": 64
774
+ },
775
+ "model.layers.3.self_attn.v_proj": {
776
+ "bits": 6,
777
+ "group_size": 64
778
+ },
779
+ "model.layers.3.self_attn.o_proj": true,
780
+ "model.layers.3.self_attn.q_norm": false,
781
+ "model.layers.3.self_attn.k_norm": false,
782
+ "model.layers.3.self_attn.rope": false,
783
+ "model.layers.3.mlp.gate_proj": {
784
+ "bits": 6,
785
+ "group_size": 64
786
+ },
787
+ "model.layers.3.mlp.down_proj": true,
788
+ "model.layers.3.mlp.up_proj": true,
789
+ "model.layers.3.input_layernorm": false,
790
+ "model.layers.3.post_attention_layernorm": false,
791
+ "model.layers.3.pre_feedforward_layernorm": false,
792
+ "model.layers.3.post_feedforward_layernorm": false,
793
+ "model.layers.4.self_attn.q_proj": {
794
+ "bits": 6,
795
+ "group_size": 64
796
+ },
797
+ "model.layers.4.self_attn.k_proj": {
798
+ "bits": 6,
799
+ "group_size": 64
800
+ },
801
+ "model.layers.4.self_attn.v_proj": true,
802
+ "model.layers.4.self_attn.o_proj": true,
803
+ "model.layers.4.self_attn.q_norm": false,
804
+ "model.layers.4.self_attn.k_norm": false,
805
+ "model.layers.4.self_attn.rope": false,
806
+ "model.layers.4.mlp.gate_proj": {
807
+ "bits": 6,
808
+ "group_size": 64
809
+ },
810
+ "model.layers.4.mlp.down_proj": true,
811
+ "model.layers.4.mlp.up_proj": {
812
+ "bits": 6,
813
+ "group_size": 64
814
+ },
815
+ "model.layers.4.input_layernorm": false,
816
+ "model.layers.4.post_attention_layernorm": false,
817
+ "model.layers.4.pre_feedforward_layernorm": false,
818
+ "model.layers.4.post_feedforward_layernorm": false,
819
+ "model.layers.5.self_attn.q_proj": true,
820
+ "model.layers.5.self_attn.k_proj": {
821
+ "bits": 6,
822
+ "group_size": 64
823
+ },
824
+ "model.layers.5.self_attn.v_proj": {
825
+ "bits": 6,
826
+ "group_size": 64
827
+ },
828
+ "model.layers.5.self_attn.o_proj": {
829
+ "bits": 6,
830
+ "group_size": 64
831
+ },
832
+ "model.layers.5.self_attn.q_norm": false,
833
+ "model.layers.5.self_attn.k_norm": false,
834
+ "model.layers.5.self_attn.rope": false,
835
+ "model.layers.5.mlp.gate_proj": true,
836
+ "model.layers.5.mlp.down_proj": {
837
+ "bits": 6,
838
+ "group_size": 64
839
+ },
840
+ "model.layers.5.mlp.up_proj": true,
841
+ "model.layers.5.input_layernorm": false,
842
+ "model.layers.5.post_attention_layernorm": false,
843
+ "model.layers.5.pre_feedforward_layernorm": false,
844
+ "model.layers.5.post_feedforward_layernorm": false,
845
+ "model.layers.6.self_attn.q_proj": true,
846
+ "model.layers.6.self_attn.k_proj": {
847
+ "bits": 6,
848
+ "group_size": 64
849
+ },
850
+ "model.layers.6.self_attn.v_proj": {
851
+ "bits": 6,
852
+ "group_size": 64
853
+ },
854
+ "model.layers.6.self_attn.o_proj": {
855
+ "bits": 6,
856
+ "group_size": 64
857
+ },
858
+ "model.layers.6.self_attn.q_norm": false,
859
+ "model.layers.6.self_attn.k_norm": false,
860
+ "model.layers.6.self_attn.rope": false,
861
+ "model.layers.6.mlp.gate_proj": {
862
+ "bits": 6,
863
+ "group_size": 64
864
+ },
865
+ "model.layers.6.mlp.down_proj": true,
866
+ "model.layers.6.mlp.up_proj": {
867
+ "bits": 6,
868
+ "group_size": 64
869
+ },
870
+ "model.layers.6.input_layernorm": false,
871
+ "model.layers.6.post_attention_layernorm": false,
872
+ "model.layers.6.pre_feedforward_layernorm": false,
873
+ "model.layers.6.post_feedforward_layernorm": false,
874
+ "model.layers.7.self_attn.q_proj": true,
875
+ "model.layers.7.self_attn.k_proj": true,
876
+ "model.layers.7.self_attn.v_proj": {
877
+ "bits": 6,
878
+ "group_size": 64
879
+ },
880
+ "model.layers.7.self_attn.o_proj": {
881
+ "bits": 6,
882
+ "group_size": 64
883
+ },
884
+ "model.layers.7.self_attn.q_norm": false,
885
+ "model.layers.7.self_attn.k_norm": false,
886
+ "model.layers.7.self_attn.rope": false,
887
+ "model.layers.7.mlp.gate_proj": {
888
+ "bits": 6,
889
+ "group_size": 64
890
+ },
891
+ "model.layers.7.mlp.down_proj": {
892
+ "bits": 6,
893
+ "group_size": 64
894
+ },
895
+ "model.layers.7.mlp.up_proj": {
896
+ "bits": 6,
897
+ "group_size": 64
898
+ },
899
+ "model.layers.7.input_layernorm": false,
900
+ "model.layers.7.post_attention_layernorm": false,
901
+ "model.layers.7.pre_feedforward_layernorm": false,
902
+ "model.layers.7.post_feedforward_layernorm": false,
903
+ "model.layers.8.self_attn.q_proj": true,
904
+ "model.layers.8.self_attn.k_proj": {
905
+ "bits": 6,
906
+ "group_size": 64
907
+ },
908
+ "model.layers.8.self_attn.v_proj": {
909
+ "bits": 6,
910
+ "group_size": 64
911
+ },
912
+ "model.layers.8.self_attn.o_proj": {
913
+ "bits": 6,
914
+ "group_size": 64
915
+ },
916
+ "model.layers.8.self_attn.q_norm": false,
917
+ "model.layers.8.self_attn.k_norm": false,
918
+ "model.layers.8.self_attn.rope": false,
919
+ "model.layers.8.mlp.gate_proj": true,
920
+ "model.layers.8.mlp.down_proj": {
921
+ "bits": 6,
922
+ "group_size": 64
923
+ },
924
+ "model.layers.8.mlp.up_proj": {
925
+ "bits": 6,
926
+ "group_size": 64
927
+ },
928
+ "model.layers.8.input_layernorm": false,
929
+ "model.layers.8.post_attention_layernorm": false,
930
+ "model.layers.8.pre_feedforward_layernorm": false,
931
+ "model.layers.8.post_feedforward_layernorm": false,
932
+ "model.layers.9.self_attn.q_proj": true,
933
+ "model.layers.9.self_attn.k_proj": {
934
+ "bits": 6,
935
+ "group_size": 64
936
+ },
937
+ "model.layers.9.self_attn.v_proj": {
938
+ "bits": 6,
939
+ "group_size": 64
940
+ },
941
+ "model.layers.9.self_attn.o_proj": true,
942
+ "model.layers.9.self_attn.q_norm": false,
943
+ "model.layers.9.self_attn.k_norm": false,
944
+ "model.layers.9.self_attn.rope": false,
945
+ "model.layers.9.mlp.gate_proj": {
946
+ "bits": 6,
947
+ "group_size": 64
948
+ },
949
+ "model.layers.9.mlp.down_proj": {
950
+ "bits": 6,
951
+ "group_size": 64
952
+ },
953
+ "model.layers.9.mlp.up_proj": true,
954
+ "model.layers.9.input_layernorm": false,
955
+ "model.layers.9.post_attention_layernorm": false,
956
+ "model.layers.9.pre_feedforward_layernorm": false,
957
+ "model.layers.9.post_feedforward_layernorm": false,
958
+ "model.layers.10.self_attn.q_proj": {
959
+ "bits": 6,
960
+ "group_size": 64
961
+ },
962
+ "model.layers.10.self_attn.k_proj": {
963
+ "bits": 6,
964
+ "group_size": 64
965
+ },
966
+ "model.layers.10.self_attn.v_proj": true,
967
+ "model.layers.10.self_attn.o_proj": {
968
+ "bits": 6,
969
+ "group_size": 64
970
+ },
971
+ "model.layers.10.self_attn.q_norm": false,
972
+ "model.layers.10.self_attn.k_norm": false,
973
+ "model.layers.10.self_attn.rope": false,
974
+ "model.layers.10.mlp.gate_proj": {
975
+ "bits": 6,
976
+ "group_size": 64
977
+ },
978
+ "model.layers.10.mlp.down_proj": {
979
+ "bits": 6,
980
+ "group_size": 64
981
+ },
982
+ "model.layers.10.mlp.up_proj": true,
983
+ "model.layers.10.input_layernorm": false,
984
+ "model.layers.10.post_attention_layernorm": false,
985
+ "model.layers.10.pre_feedforward_layernorm": false,
986
+ "model.layers.10.post_feedforward_layernorm": false,
987
+ "model.layers.11.self_attn.q_proj": {
988
+ "bits": 6,
989
+ "group_size": 64
990
+ },
991
+ "model.layers.11.self_attn.k_proj": true,
992
+ "model.layers.11.self_attn.v_proj": {
993
+ "bits": 6,
994
+ "group_size": 64
995
+ },
996
+ "model.layers.11.self_attn.o_proj": {
997
+ "bits": 6,
998
+ "group_size": 64
999
+ },
1000
+ "model.layers.11.self_attn.q_norm": false,
1001
+ "model.layers.11.self_attn.k_norm": false,
1002
+ "model.layers.11.self_attn.rope": false,
1003
+ "model.layers.11.mlp.gate_proj": {
1004
+ "bits": 6,
1005
+ "group_size": 64
1006
+ },
1007
+ "model.layers.11.mlp.down_proj": true,
1008
+ "model.layers.11.mlp.up_proj": true,
1009
+ "model.layers.11.input_layernorm": false,
1010
+ "model.layers.11.post_attention_layernorm": false,
1011
+ "model.layers.11.pre_feedforward_layernorm": false,
1012
+ "model.layers.11.post_feedforward_layernorm": false,
1013
+ "model.layers.12.self_attn.q_proj": true,
1014
+ "model.layers.12.self_attn.k_proj": {
1015
+ "bits": 6,
1016
+ "group_size": 64
1017
+ },
1018
+ "model.layers.12.self_attn.v_proj": true,
1019
+ "model.layers.12.self_attn.o_proj": {
1020
+ "bits": 6,
1021
+ "group_size": 64
1022
+ },
1023
+ "model.layers.12.self_attn.q_norm": false,
1024
+ "model.layers.12.self_attn.k_norm": false,
1025
+ "model.layers.12.self_attn.rope": false,
1026
+ "model.layers.12.mlp.gate_proj": true,
1027
+ "model.layers.12.mlp.down_proj": true,
1028
+ "model.layers.12.mlp.up_proj": {
1029
+ "bits": 6,
1030
+ "group_size": 64
1031
+ },
1032
+ "model.layers.12.input_layernorm": false,
1033
+ "model.layers.12.post_attention_layernorm": false,
1034
+ "model.layers.12.pre_feedforward_layernorm": false,
1035
+ "model.layers.12.post_feedforward_layernorm": false,
1036
+ "model.layers.13.self_attn.q_proj": true,
1037
+ "model.layers.13.self_attn.k_proj": {
1038
+ "bits": 6,
1039
+ "group_size": 64
1040
+ },
1041
+ "model.layers.13.self_attn.v_proj": true,
1042
+ "model.layers.13.self_attn.o_proj": {
1043
+ "bits": 6,
1044
+ "group_size": 64
1045
+ },
1046
+ "model.layers.13.self_attn.q_norm": false,
1047
+ "model.layers.13.self_attn.k_norm": false,
1048
+ "model.layers.13.self_attn.rope": false,
1049
+ "model.layers.13.mlp.gate_proj": true,
1050
+ "model.layers.13.mlp.down_proj": {
1051
+ "bits": 6,
1052
+ "group_size": 64
1053
+ },
1054
+ "model.layers.13.mlp.up_proj": true,
1055
+ "model.layers.13.input_layernorm": false,
1056
+ "model.layers.13.post_attention_layernorm": false,
1057
+ "model.layers.13.pre_feedforward_layernorm": false,
1058
+ "model.layers.13.post_feedforward_layernorm": false,
1059
+ "model.layers.14.self_attn.q_proj": {
1060
+ "bits": 6,
1061
+ "group_size": 64
1062
+ },
1063
+ "model.layers.14.self_attn.k_proj": {
1064
+ "bits": 6,
1065
+ "group_size": 64
1066
+ },
1067
+ "model.layers.14.self_attn.v_proj": {
1068
+ "bits": 6,
1069
+ "group_size": 64
1070
+ },
1071
+ "model.layers.14.self_attn.o_proj": {
1072
+ "bits": 6,
1073
+ "group_size": 64
1074
+ },
1075
+ "model.layers.14.self_attn.q_norm": false,
1076
+ "model.layers.14.self_attn.k_norm": false,
1077
+ "model.layers.14.self_attn.rope": false,
1078
+ "model.layers.14.mlp.gate_proj": {
1079
+ "bits": 6,
1080
+ "group_size": 64
1081
+ },
1082
+ "model.layers.14.mlp.down_proj": true,
1083
+ "model.layers.14.mlp.up_proj": true,
1084
+ "model.layers.14.input_layernorm": false,
1085
+ "model.layers.14.post_attention_layernorm": false,
1086
+ "model.layers.14.pre_feedforward_layernorm": false,
1087
+ "model.layers.14.post_feedforward_layernorm": false,
1088
+ "model.layers.15.self_attn.q_proj": {
1089
+ "bits": 6,
1090
+ "group_size": 64
1091
+ },
1092
+ "model.layers.15.self_attn.k_proj": {
1093
+ "bits": 6,
1094
+ "group_size": 64
1095
+ },
1096
+ "model.layers.15.self_attn.v_proj": true,
1097
+ "model.layers.15.self_attn.o_proj": {
1098
+ "bits": 6,
1099
+ "group_size": 64
1100
+ },
1101
+ "model.layers.15.self_attn.q_norm": false,
1102
+ "model.layers.15.self_attn.k_norm": false,
1103
+ "model.layers.15.self_attn.rope": false,
1104
+ "model.layers.15.mlp.gate_proj": {
1105
+ "bits": 6,
1106
+ "group_size": 64
1107
+ },
1108
+ "model.layers.15.mlp.down_proj": true,
1109
+ "model.layers.15.mlp.up_proj": true,
1110
+ "model.layers.15.input_layernorm": false,
1111
+ "model.layers.15.post_attention_layernorm": false,
1112
+ "model.layers.15.pre_feedforward_layernorm": false,
1113
+ "model.layers.15.post_feedforward_layernorm": false,
1114
+ "model.layers.16.self_attn.q_proj": {
1115
+ "bits": 6,
1116
+ "group_size": 64
1117
+ },
1118
+ "model.layers.16.self_attn.k_proj": {
1119
+ "bits": 6,
1120
+ "group_size": 64
1121
+ },
1122
+ "model.layers.16.self_attn.v_proj": {
1123
+ "bits": 6,
1124
+ "group_size": 64
1125
+ },
1126
+ "model.layers.16.self_attn.o_proj": {
1127
+ "bits": 6,
1128
+ "group_size": 64
1129
+ },
1130
+ "model.layers.16.self_attn.q_norm": false,
1131
+ "model.layers.16.self_attn.k_norm": false,
1132
+ "model.layers.16.self_attn.rope": false,
1133
+ "model.layers.16.mlp.gate_proj": {
1134
+ "bits": 6,
1135
+ "group_size": 64
1136
+ },
1137
+ "model.layers.16.mlp.down_proj": {
1138
+ "bits": 6,
1139
+ "group_size": 64
1140
+ },
1141
+ "model.layers.16.mlp.up_proj": true,
1142
+ "model.layers.16.input_layernorm": false,
1143
+ "model.layers.16.post_attention_layernorm": false,
1144
+ "model.layers.16.pre_feedforward_layernorm": false,
1145
+ "model.layers.16.post_feedforward_layernorm": false,
1146
+ "model.layers.17.self_attn.q_proj": {
1147
+ "bits": 6,
1148
+ "group_size": 64
1149
+ },
1150
+ "model.layers.17.self_attn.k_proj": {
1151
+ "bits": 6,
1152
+ "group_size": 64
1153
+ },
1154
+ "model.layers.17.self_attn.v_proj": true,
1155
+ "model.layers.17.self_attn.o_proj": {
1156
+ "bits": 6,
1157
+ "group_size": 64
1158
+ },
1159
+ "model.layers.17.self_attn.q_norm": false,
1160
+ "model.layers.17.self_attn.k_norm": false,
1161
+ "model.layers.17.self_attn.rope": false,
1162
+ "model.layers.17.mlp.gate_proj": {
1163
+ "bits": 6,
1164
+ "group_size": 64
1165
+ },
1166
+ "model.layers.17.mlp.down_proj": {
1167
+ "bits": 6,
1168
+ "group_size": 64
1169
+ },
1170
+ "model.layers.17.mlp.up_proj": true,
1171
+ "model.layers.17.input_layernorm": false,
1172
+ "model.layers.17.post_attention_layernorm": false,
1173
+ "model.layers.17.pre_feedforward_layernorm": false,
1174
+ "model.layers.17.post_feedforward_layernorm": false,
1175
+ "model.layers.18.self_attn.q_proj": true,
1176
+ "model.layers.18.self_attn.k_proj": {
1177
+ "bits": 6,
1178
+ "group_size": 64
1179
+ },
1180
+ "model.layers.18.self_attn.v_proj": true,
1181
+ "model.layers.18.self_attn.o_proj": true,
1182
+ "model.layers.18.self_attn.q_norm": false,
1183
+ "model.layers.18.self_attn.k_norm": false,
1184
+ "model.layers.18.self_attn.rope": false,
1185
+ "model.layers.18.mlp.gate_proj": true,
1186
+ "model.layers.18.mlp.down_proj": {
1187
+ "bits": 6,
1188
+ "group_size": 64
1189
+ },
1190
+ "model.layers.18.mlp.up_proj": {
1191
+ "bits": 6,
1192
+ "group_size": 64
1193
+ },
1194
+ "model.layers.18.input_layernorm": false,
1195
+ "model.layers.18.post_attention_layernorm": false,
1196
+ "model.layers.18.pre_feedforward_layernorm": false,
1197
+ "model.layers.18.post_feedforward_layernorm": false,
1198
+ "model.layers.19.self_attn.q_proj": true,
1199
+ "model.layers.19.self_attn.k_proj": true,
1200
+ "model.layers.19.self_attn.v_proj": {
1201
+ "bits": 6,
1202
+ "group_size": 64
1203
+ },
1204
+ "model.layers.19.self_attn.o_proj": {
1205
+ "bits": 6,
1206
+ "group_size": 64
1207
+ },
1208
+ "model.layers.19.self_attn.q_norm": false,
1209
+ "model.layers.19.self_attn.k_norm": false,
1210
+ "model.layers.19.self_attn.rope": false,
1211
+ "model.layers.19.mlp.gate_proj": true,
1212
+ "model.layers.19.mlp.down_proj": true,
1213
+ "model.layers.19.mlp.up_proj": true,
1214
+ "model.layers.19.input_layernorm": false,
1215
+ "model.layers.19.post_attention_layernorm": false,
1216
+ "model.layers.19.pre_feedforward_layernorm": false,
1217
+ "model.layers.19.post_feedforward_layernorm": false,
1218
+ "model.layers.20.self_attn.q_proj": true,
1219
+ "model.layers.20.self_attn.k_proj": true,
1220
+ "model.layers.20.self_attn.v_proj": true,
1221
+ "model.layers.20.self_attn.o_proj": {
1222
+ "bits": 6,
1223
+ "group_size": 64
1224
+ },
1225
+ "model.layers.20.self_attn.q_norm": false,
1226
+ "model.layers.20.self_attn.k_norm": false,
1227
+ "model.layers.20.self_attn.rope": false,
1228
+ "model.layers.20.mlp.gate_proj": true,
1229
+ "model.layers.20.mlp.down_proj": true,
1230
+ "model.layers.20.mlp.up_proj": true,
1231
+ "model.layers.20.input_layernorm": false,
1232
+ "model.layers.20.post_attention_layernorm": false,
1233
+ "model.layers.20.pre_feedforward_layernorm": false,
1234
+ "model.layers.20.post_feedforward_layernorm": false,
1235
+ "model.layers.21.self_attn.q_proj": {
1236
+ "bits": 6,
1237
+ "group_size": 64
1238
+ },
1239
+ "model.layers.21.self_attn.k_proj": {
1240
+ "bits": 6,
1241
+ "group_size": 64
1242
+ },
1243
+ "model.layers.21.self_attn.v_proj": {
1244
+ "bits": 6,
1245
+ "group_size": 64
1246
+ },
1247
+ "model.layers.21.self_attn.o_proj": true,
1248
+ "model.layers.21.self_attn.q_norm": false,
1249
+ "model.layers.21.self_attn.k_norm": false,
1250
+ "model.layers.21.self_attn.rope": false,
1251
+ "model.layers.21.mlp.gate_proj": {
1252
+ "bits": 6,
1253
+ "group_size": 64
1254
+ },
1255
+ "model.layers.21.mlp.down_proj": true,
1256
+ "model.layers.21.mlp.up_proj": true,
1257
+ "model.layers.21.input_layernorm": false,
1258
+ "model.layers.21.post_attention_layernorm": false,
1259
+ "model.layers.21.pre_feedforward_layernorm": false,
1260
+ "model.layers.21.post_feedforward_layernorm": false,
1261
+ "model.layers.22.self_attn.q_proj": {
1262
+ "bits": 6,
1263
+ "group_size": 64
1264
+ },
1265
+ "model.layers.22.self_attn.k_proj": {
1266
+ "bits": 6,
1267
+ "group_size": 64
1268
+ },
1269
+ "model.layers.22.self_attn.v_proj": true,
1270
+ "model.layers.22.self_attn.o_proj": {
1271
+ "bits": 6,
1272
+ "group_size": 64
1273
+ },
1274
+ "model.layers.22.self_attn.q_norm": false,
1275
+ "model.layers.22.self_attn.k_norm": false,
1276
+ "model.layers.22.self_attn.rope": false,
1277
+ "model.layers.22.mlp.gate_proj": true,
1278
+ "model.layers.22.mlp.down_proj": true,
1279
+ "model.layers.22.mlp.up_proj": true,
1280
+ "model.layers.22.input_layernorm": false,
1281
+ "model.layers.22.post_attention_layernorm": false,
1282
+ "model.layers.22.pre_feedforward_layernorm": false,
1283
+ "model.layers.22.post_feedforward_layernorm": false,
1284
+ "model.layers.23.self_attn.q_proj": true,
1285
+ "model.layers.23.self_attn.k_proj": {
1286
+ "bits": 6,
1287
+ "group_size": 64
1288
+ },
1289
+ "model.layers.23.self_attn.v_proj": true,
1290
+ "model.layers.23.self_attn.o_proj": {
1291
+ "bits": 6,
1292
+ "group_size": 64
1293
+ },
1294
+ "model.layers.23.self_attn.q_norm": false,
1295
+ "model.layers.23.self_attn.k_norm": false,
1296
+ "model.layers.23.self_attn.rope": false,
1297
+ "model.layers.23.mlp.gate_proj": true,
1298
+ "model.layers.23.mlp.down_proj": true,
1299
+ "model.layers.23.mlp.up_proj": true,
1300
+ "model.layers.23.input_layernorm": false,
1301
+ "model.layers.23.post_attention_layernorm": false,
1302
+ "model.layers.23.pre_feedforward_layernorm": false,
1303
+ "model.layers.23.post_feedforward_layernorm": false,
1304
+ "model.layers.24.self_attn.q_proj": {
1305
+ "bits": 6,
1306
+ "group_size": 64
1307
+ },
1308
+ "model.layers.24.self_attn.k_proj": {
1309
+ "bits": 6,
1310
+ "group_size": 64
1311
+ },
1312
+ "model.layers.24.self_attn.v_proj": true,
1313
+ "model.layers.24.self_attn.o_proj": true,
1314
+ "model.layers.24.self_attn.q_norm": false,
1315
+ "model.layers.24.self_attn.k_norm": false,
1316
+ "model.layers.24.self_attn.rope": false,
1317
+ "model.layers.24.mlp.gate_proj": true,
1318
+ "model.layers.24.mlp.down_proj": true,
1319
+ "model.layers.24.mlp.up_proj": true,
1320
+ "model.layers.24.input_layernorm": false,
1321
+ "model.layers.24.post_attention_layernorm": false,
1322
+ "model.layers.24.pre_feedforward_layernorm": false,
1323
+ "model.layers.24.post_feedforward_layernorm": false,
1324
+ "model.layers.25.self_attn.q_proj": {
1325
+ "bits": 6,
1326
+ "group_size": 64
1327
+ },
1328
+ "model.layers.25.self_attn.k_proj": {
1329
+ "bits": 6,
1330
+ "group_size": 64
1331
+ },
1332
+ "model.layers.25.self_attn.v_proj": true,
1333
+ "model.layers.25.self_attn.o_proj": true,
1334
+ "model.layers.25.self_attn.q_norm": false,
1335
+ "model.layers.25.self_attn.k_norm": false,
1336
+ "model.layers.25.self_attn.rope": false,
1337
+ "model.layers.25.mlp.gate_proj": true,
1338
+ "model.layers.25.mlp.down_proj": true,
1339
+ "model.layers.25.mlp.up_proj": true,
1340
+ "model.layers.25.input_layernorm": false,
1341
+ "model.layers.25.post_attention_layernorm": false,
1342
+ "model.layers.25.pre_feedforward_layernorm": false,
1343
+ "model.layers.25.post_feedforward_layernorm": false,
1344
+ "model.norm": false,
1345
+ "lm_head": {
1346
+ "bits": 6,
1347
+ "group_size": 64
1348
+ }
1349
+ },
1350
+ "query_pre_attn_scalar": 256,
1351
+ "rms_norm_eps": 1e-06,
1352
+ "rope_local_base_freq": 10000,
1353
+ "rope_scaling": null,
1354
+ "rope_theta": 1000000,
1355
+ "sliding_window": 512,
1356
+ "sliding_window_pattern": 6,
1357
+ "torch_dtype": "bfloat16",
1358
+ "transformers_version": "4.50.0.dev0",
1359
+ "use_cache": true,
1360
+ "vocab_size": 262144
1361
+ }