pcuenq HF Staff commited on
Commit
ca0456d
·
verified ·
1 Parent(s): 8ede312

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +1625 -0
config.json ADDED
@@ -0,0 +1,1625 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": null,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": [
11
+ 1,
12
+ 106
13
+ ],
14
+ "final_logit_softcapping": null,
15
+ "head_dim": 256,
16
+ "hidden_activation": "gelu_pytorch_tanh",
17
+ "hidden_size": 1152,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 6912,
20
+ "max_position_embeddings": 32768,
21
+ "model_type": "gemma3_text",
22
+ "num_attention_heads": 4,
23
+ "num_hidden_layers": 26,
24
+ "num_key_value_heads": 1,
25
+ "pad_token_id": 0,
26
+ "quantization": {
27
+ "group_size": 64,
28
+ "bits": 2,
29
+ "model.embed_tokens": {
30
+ "bits": 6,
31
+ "group_size": 64
32
+ },
33
+ "model.layers.0.self_attn.q_proj": {
34
+ "bits": 6,
35
+ "group_size": 64
36
+ },
37
+ "model.layers.0.self_attn.k_proj": true,
38
+ "model.layers.0.self_attn.v_proj": {
39
+ "bits": 6,
40
+ "group_size": 64
41
+ },
42
+ "model.layers.0.self_attn.o_proj": {
43
+ "bits": 6,
44
+ "group_size": 64
45
+ },
46
+ "model.layers.0.self_attn.q_norm": false,
47
+ "model.layers.0.self_attn.k_norm": false,
48
+ "model.layers.0.self_attn.rope": false,
49
+ "model.layers.0.mlp.gate_proj": {
50
+ "bits": 6,
51
+ "group_size": 64
52
+ },
53
+ "model.layers.0.mlp.down_proj": true,
54
+ "model.layers.0.mlp.up_proj": {
55
+ "bits": 6,
56
+ "group_size": 64
57
+ },
58
+ "model.layers.0.input_layernorm": false,
59
+ "model.layers.0.post_attention_layernorm": false,
60
+ "model.layers.0.pre_feedforward_layernorm": false,
61
+ "model.layers.0.post_feedforward_layernorm": false,
62
+ "model.layers.1.self_attn.q_proj": {
63
+ "bits": 6,
64
+ "group_size": 64
65
+ },
66
+ "model.layers.1.self_attn.k_proj": true,
67
+ "model.layers.1.self_attn.v_proj": {
68
+ "bits": 6,
69
+ "group_size": 64
70
+ },
71
+ "model.layers.1.self_attn.o_proj": {
72
+ "bits": 6,
73
+ "group_size": 64
74
+ },
75
+ "model.layers.1.self_attn.q_norm": false,
76
+ "model.layers.1.self_attn.k_norm": false,
77
+ "model.layers.1.self_attn.rope": false,
78
+ "model.layers.1.mlp.gate_proj": true,
79
+ "model.layers.1.mlp.down_proj": true,
80
+ "model.layers.1.mlp.up_proj": {
81
+ "bits": 6,
82
+ "group_size": 64
83
+ },
84
+ "model.layers.1.input_layernorm": false,
85
+ "model.layers.1.post_attention_layernorm": false,
86
+ "model.layers.1.pre_feedforward_layernorm": false,
87
+ "model.layers.1.post_feedforward_layernorm": false,
88
+ "model.layers.2.self_attn.q_proj": true,
89
+ "model.layers.2.self_attn.k_proj": true,
90
+ "model.layers.2.self_attn.v_proj": {
91
+ "bits": 6,
92
+ "group_size": 64
93
+ },
94
+ "model.layers.2.self_attn.o_proj": true,
95
+ "model.layers.2.self_attn.q_norm": false,
96
+ "model.layers.2.self_attn.k_norm": false,
97
+ "model.layers.2.self_attn.rope": false,
98
+ "model.layers.2.mlp.gate_proj": true,
99
+ "model.layers.2.mlp.down_proj": {
100
+ "bits": 6,
101
+ "group_size": 64
102
+ },
103
+ "model.layers.2.mlp.up_proj": {
104
+ "bits": 6,
105
+ "group_size": 64
106
+ },
107
+ "model.layers.2.input_layernorm": false,
108
+ "model.layers.2.post_attention_layernorm": false,
109
+ "model.layers.2.pre_feedforward_layernorm": false,
110
+ "model.layers.2.post_feedforward_layernorm": false,
111
+ "model.layers.3.self_attn.q_proj": {
112
+ "bits": 6,
113
+ "group_size": 64
114
+ },
115
+ "model.layers.3.self_attn.k_proj": {
116
+ "bits": 6,
117
+ "group_size": 64
118
+ },
119
+ "model.layers.3.self_attn.v_proj": {
120
+ "bits": 6,
121
+ "group_size": 64
122
+ },
123
+ "model.layers.3.self_attn.o_proj": true,
124
+ "model.layers.3.self_attn.q_norm": false,
125
+ "model.layers.3.self_attn.k_norm": false,
126
+ "model.layers.3.self_attn.rope": false,
127
+ "model.layers.3.mlp.gate_proj": {
128
+ "bits": 6,
129
+ "group_size": 64
130
+ },
131
+ "model.layers.3.mlp.down_proj": {
132
+ "bits": 6,
133
+ "group_size": 64
134
+ },
135
+ "model.layers.3.mlp.up_proj": true,
136
+ "model.layers.3.input_layernorm": false,
137
+ "model.layers.3.post_attention_layernorm": false,
138
+ "model.layers.3.pre_feedforward_layernorm": false,
139
+ "model.layers.3.post_feedforward_layernorm": false,
140
+ "model.layers.4.self_attn.q_proj": {
141
+ "bits": 6,
142
+ "group_size": 64
143
+ },
144
+ "model.layers.4.self_attn.k_proj": {
145
+ "bits": 6,
146
+ "group_size": 64
147
+ },
148
+ "model.layers.4.self_attn.v_proj": true,
149
+ "model.layers.4.self_attn.o_proj": true,
150
+ "model.layers.4.self_attn.q_norm": false,
151
+ "model.layers.4.self_attn.k_norm": false,
152
+ "model.layers.4.self_attn.rope": false,
153
+ "model.layers.4.mlp.gate_proj": {
154
+ "bits": 6,
155
+ "group_size": 64
156
+ },
157
+ "model.layers.4.mlp.down_proj": true,
158
+ "model.layers.4.mlp.up_proj": {
159
+ "bits": 6,
160
+ "group_size": 64
161
+ },
162
+ "model.layers.4.input_layernorm": false,
163
+ "model.layers.4.post_attention_layernorm": false,
164
+ "model.layers.4.pre_feedforward_layernorm": false,
165
+ "model.layers.4.post_feedforward_layernorm": false,
166
+ "model.layers.5.self_attn.q_proj": {
167
+ "bits": 6,
168
+ "group_size": 64
169
+ },
170
+ "model.layers.5.self_attn.k_proj": {
171
+ "bits": 6,
172
+ "group_size": 64
173
+ },
174
+ "model.layers.5.self_attn.v_proj": {
175
+ "bits": 6,
176
+ "group_size": 64
177
+ },
178
+ "model.layers.5.self_attn.o_proj": {
179
+ "bits": 6,
180
+ "group_size": 64
181
+ },
182
+ "model.layers.5.self_attn.q_norm": false,
183
+ "model.layers.5.self_attn.k_norm": false,
184
+ "model.layers.5.self_attn.rope": false,
185
+ "model.layers.5.mlp.gate_proj": true,
186
+ "model.layers.5.mlp.down_proj": {
187
+ "bits": 6,
188
+ "group_size": 64
189
+ },
190
+ "model.layers.5.mlp.up_proj": true,
191
+ "model.layers.5.input_layernorm": false,
192
+ "model.layers.5.post_attention_layernorm": false,
193
+ "model.layers.5.pre_feedforward_layernorm": false,
194
+ "model.layers.5.post_feedforward_layernorm": false,
195
+ "model.layers.6.self_attn.q_proj": true,
196
+ "model.layers.6.self_attn.k_proj": {
197
+ "bits": 6,
198
+ "group_size": 64
199
+ },
200
+ "model.layers.6.self_attn.v_proj": {
201
+ "bits": 6,
202
+ "group_size": 64
203
+ },
204
+ "model.layers.6.self_attn.o_proj": {
205
+ "bits": 6,
206
+ "group_size": 64
207
+ },
208
+ "model.layers.6.self_attn.q_norm": false,
209
+ "model.layers.6.self_attn.k_norm": false,
210
+ "model.layers.6.self_attn.rope": false,
211
+ "model.layers.6.mlp.gate_proj": {
212
+ "bits": 6,
213
+ "group_size": 64
214
+ },
215
+ "model.layers.6.mlp.down_proj": true,
216
+ "model.layers.6.mlp.up_proj": {
217
+ "bits": 6,
218
+ "group_size": 64
219
+ },
220
+ "model.layers.6.input_layernorm": false,
221
+ "model.layers.6.post_attention_layernorm": false,
222
+ "model.layers.6.pre_feedforward_layernorm": false,
223
+ "model.layers.6.post_feedforward_layernorm": false,
224
+ "model.layers.7.self_attn.q_proj": true,
225
+ "model.layers.7.self_attn.k_proj": {
226
+ "bits": 6,
227
+ "group_size": 64
228
+ },
229
+ "model.layers.7.self_attn.v_proj": true,
230
+ "model.layers.7.self_attn.o_proj": {
231
+ "bits": 6,
232
+ "group_size": 64
233
+ },
234
+ "model.layers.7.self_attn.q_norm": false,
235
+ "model.layers.7.self_attn.k_norm": false,
236
+ "model.layers.7.self_attn.rope": false,
237
+ "model.layers.7.mlp.gate_proj": {
238
+ "bits": 6,
239
+ "group_size": 64
240
+ },
241
+ "model.layers.7.mlp.down_proj": {
242
+ "bits": 6,
243
+ "group_size": 64
244
+ },
245
+ "model.layers.7.mlp.up_proj": {
246
+ "bits": 6,
247
+ "group_size": 64
248
+ },
249
+ "model.layers.7.input_layernorm": false,
250
+ "model.layers.7.post_attention_layernorm": false,
251
+ "model.layers.7.pre_feedforward_layernorm": false,
252
+ "model.layers.7.post_feedforward_layernorm": false,
253
+ "model.layers.8.self_attn.q_proj": true,
254
+ "model.layers.8.self_attn.k_proj": {
255
+ "bits": 6,
256
+ "group_size": 64
257
+ },
258
+ "model.layers.8.self_attn.v_proj": {
259
+ "bits": 6,
260
+ "group_size": 64
261
+ },
262
+ "model.layers.8.self_attn.o_proj": {
263
+ "bits": 6,
264
+ "group_size": 64
265
+ },
266
+ "model.layers.8.self_attn.q_norm": false,
267
+ "model.layers.8.self_attn.k_norm": false,
268
+ "model.layers.8.self_attn.rope": false,
269
+ "model.layers.8.mlp.gate_proj": {
270
+ "bits": 6,
271
+ "group_size": 64
272
+ },
273
+ "model.layers.8.mlp.down_proj": {
274
+ "bits": 6,
275
+ "group_size": 64
276
+ },
277
+ "model.layers.8.mlp.up_proj": {
278
+ "bits": 6,
279
+ "group_size": 64
280
+ },
281
+ "model.layers.8.input_layernorm": false,
282
+ "model.layers.8.post_attention_layernorm": false,
283
+ "model.layers.8.pre_feedforward_layernorm": false,
284
+ "model.layers.8.post_feedforward_layernorm": false,
285
+ "model.layers.9.self_attn.q_proj": {
286
+ "bits": 6,
287
+ "group_size": 64
288
+ },
289
+ "model.layers.9.self_attn.k_proj": {
290
+ "bits": 6,
291
+ "group_size": 64
292
+ },
293
+ "model.layers.9.self_attn.v_proj": {
294
+ "bits": 6,
295
+ "group_size": 64
296
+ },
297
+ "model.layers.9.self_attn.o_proj": true,
298
+ "model.layers.9.self_attn.q_norm": false,
299
+ "model.layers.9.self_attn.k_norm": false,
300
+ "model.layers.9.self_attn.rope": false,
301
+ "model.layers.9.mlp.gate_proj": {
302
+ "bits": 6,
303
+ "group_size": 64
304
+ },
305
+ "model.layers.9.mlp.down_proj": {
306
+ "bits": 6,
307
+ "group_size": 64
308
+ },
309
+ "model.layers.9.mlp.up_proj": true,
310
+ "model.layers.9.input_layernorm": false,
311
+ "model.layers.9.post_attention_layernorm": false,
312
+ "model.layers.9.pre_feedforward_layernorm": false,
313
+ "model.layers.9.post_feedforward_layernorm": false,
314
+ "model.layers.10.self_attn.q_proj": {
315
+ "bits": 6,
316
+ "group_size": 64
317
+ },
318
+ "model.layers.10.self_attn.k_proj": {
319
+ "bits": 6,
320
+ "group_size": 64
321
+ },
322
+ "model.layers.10.self_attn.v_proj": true,
323
+ "model.layers.10.self_attn.o_proj": {
324
+ "bits": 6,
325
+ "group_size": 64
326
+ },
327
+ "model.layers.10.self_attn.q_norm": false,
328
+ "model.layers.10.self_attn.k_norm": false,
329
+ "model.layers.10.self_attn.rope": false,
330
+ "model.layers.10.mlp.gate_proj": {
331
+ "bits": 6,
332
+ "group_size": 64
333
+ },
334
+ "model.layers.10.mlp.down_proj": {
335
+ "bits": 6,
336
+ "group_size": 64
337
+ },
338
+ "model.layers.10.mlp.up_proj": {
339
+ "bits": 6,
340
+ "group_size": 64
341
+ },
342
+ "model.layers.10.input_layernorm": false,
343
+ "model.layers.10.post_attention_layernorm": false,
344
+ "model.layers.10.pre_feedforward_layernorm": false,
345
+ "model.layers.10.post_feedforward_layernorm": false,
346
+ "model.layers.11.self_attn.q_proj": {
347
+ "bits": 6,
348
+ "group_size": 64
349
+ },
350
+ "model.layers.11.self_attn.k_proj": true,
351
+ "model.layers.11.self_attn.v_proj": {
352
+ "bits": 6,
353
+ "group_size": 64
354
+ },
355
+ "model.layers.11.self_attn.o_proj": {
356
+ "bits": 6,
357
+ "group_size": 64
358
+ },
359
+ "model.layers.11.self_attn.q_norm": false,
360
+ "model.layers.11.self_attn.k_norm": false,
361
+ "model.layers.11.self_attn.rope": false,
362
+ "model.layers.11.mlp.gate_proj": {
363
+ "bits": 6,
364
+ "group_size": 64
365
+ },
366
+ "model.layers.11.mlp.down_proj": {
367
+ "bits": 6,
368
+ "group_size": 64
369
+ },
370
+ "model.layers.11.mlp.up_proj": {
371
+ "bits": 6,
372
+ "group_size": 64
373
+ },
374
+ "model.layers.11.input_layernorm": false,
375
+ "model.layers.11.post_attention_layernorm": false,
376
+ "model.layers.11.pre_feedforward_layernorm": false,
377
+ "model.layers.11.post_feedforward_layernorm": false,
378
+ "model.layers.12.self_attn.q_proj": {
379
+ "bits": 6,
380
+ "group_size": 64
381
+ },
382
+ "model.layers.12.self_attn.k_proj": {
383
+ "bits": 6,
384
+ "group_size": 64
385
+ },
386
+ "model.layers.12.self_attn.v_proj": true,
387
+ "model.layers.12.self_attn.o_proj": {
388
+ "bits": 6,
389
+ "group_size": 64
390
+ },
391
+ "model.layers.12.self_attn.q_norm": false,
392
+ "model.layers.12.self_attn.k_norm": false,
393
+ "model.layers.12.self_attn.rope": false,
394
+ "model.layers.12.mlp.gate_proj": {
395
+ "bits": 6,
396
+ "group_size": 64
397
+ },
398
+ "model.layers.12.mlp.down_proj": {
399
+ "bits": 6,
400
+ "group_size": 64
401
+ },
402
+ "model.layers.12.mlp.up_proj": {
403
+ "bits": 6,
404
+ "group_size": 64
405
+ },
406
+ "model.layers.12.input_layernorm": false,
407
+ "model.layers.12.post_attention_layernorm": false,
408
+ "model.layers.12.pre_feedforward_layernorm": false,
409
+ "model.layers.12.post_feedforward_layernorm": false,
410
+ "model.layers.13.self_attn.q_proj": {
411
+ "bits": 6,
412
+ "group_size": 64
413
+ },
414
+ "model.layers.13.self_attn.k_proj": {
415
+ "bits": 6,
416
+ "group_size": 64
417
+ },
418
+ "model.layers.13.self_attn.v_proj": true,
419
+ "model.layers.13.self_attn.o_proj": {
420
+ "bits": 6,
421
+ "group_size": 64
422
+ },
423
+ "model.layers.13.self_attn.q_norm": false,
424
+ "model.layers.13.self_attn.k_norm": false,
425
+ "model.layers.13.self_attn.rope": false,
426
+ "model.layers.13.mlp.gate_proj": {
427
+ "bits": 6,
428
+ "group_size": 64
429
+ },
430
+ "model.layers.13.mlp.down_proj": {
431
+ "bits": 6,
432
+ "group_size": 64
433
+ },
434
+ "model.layers.13.mlp.up_proj": {
435
+ "bits": 6,
436
+ "group_size": 64
437
+ },
438
+ "model.layers.13.input_layernorm": false,
439
+ "model.layers.13.post_attention_layernorm": false,
440
+ "model.layers.13.pre_feedforward_layernorm": false,
441
+ "model.layers.13.post_feedforward_layernorm": false,
442
+ "model.layers.14.self_attn.q_proj": {
443
+ "bits": 6,
444
+ "group_size": 64
445
+ },
446
+ "model.layers.14.self_attn.k_proj": {
447
+ "bits": 6,
448
+ "group_size": 64
449
+ },
450
+ "model.layers.14.self_attn.v_proj": {
451
+ "bits": 6,
452
+ "group_size": 64
453
+ },
454
+ "model.layers.14.self_attn.o_proj": {
455
+ "bits": 6,
456
+ "group_size": 64
457
+ },
458
+ "model.layers.14.self_attn.q_norm": false,
459
+ "model.layers.14.self_attn.k_norm": false,
460
+ "model.layers.14.self_attn.rope": false,
461
+ "model.layers.14.mlp.gate_proj": {
462
+ "bits": 6,
463
+ "group_size": 64
464
+ },
465
+ "model.layers.14.mlp.down_proj": {
466
+ "bits": 6,
467
+ "group_size": 64
468
+ },
469
+ "model.layers.14.mlp.up_proj": {
470
+ "bits": 6,
471
+ "group_size": 64
472
+ },
473
+ "model.layers.14.input_layernorm": false,
474
+ "model.layers.14.post_attention_layernorm": false,
475
+ "model.layers.14.pre_feedforward_layernorm": false,
476
+ "model.layers.14.post_feedforward_layernorm": false,
477
+ "model.layers.15.self_attn.q_proj": {
478
+ "bits": 6,
479
+ "group_size": 64
480
+ },
481
+ "model.layers.15.self_attn.k_proj": {
482
+ "bits": 6,
483
+ "group_size": 64
484
+ },
485
+ "model.layers.15.self_attn.v_proj": true,
486
+ "model.layers.15.self_attn.o_proj": {
487
+ "bits": 6,
488
+ "group_size": 64
489
+ },
490
+ "model.layers.15.self_attn.q_norm": false,
491
+ "model.layers.15.self_attn.k_norm": false,
492
+ "model.layers.15.self_attn.rope": false,
493
+ "model.layers.15.mlp.gate_proj": {
494
+ "bits": 6,
495
+ "group_size": 64
496
+ },
497
+ "model.layers.15.mlp.down_proj": {
498
+ "bits": 6,
499
+ "group_size": 64
500
+ },
501
+ "model.layers.15.mlp.up_proj": {
502
+ "bits": 6,
503
+ "group_size": 64
504
+ },
505
+ "model.layers.15.input_layernorm": false,
506
+ "model.layers.15.post_attention_layernorm": false,
507
+ "model.layers.15.pre_feedforward_layernorm": false,
508
+ "model.layers.15.post_feedforward_layernorm": false,
509
+ "model.layers.16.self_attn.q_proj": {
510
+ "bits": 6,
511
+ "group_size": 64
512
+ },
513
+ "model.layers.16.self_attn.k_proj": {
514
+ "bits": 6,
515
+ "group_size": 64
516
+ },
517
+ "model.layers.16.self_attn.v_proj": {
518
+ "bits": 6,
519
+ "group_size": 64
520
+ },
521
+ "model.layers.16.self_attn.o_proj": {
522
+ "bits": 6,
523
+ "group_size": 64
524
+ },
525
+ "model.layers.16.self_attn.q_norm": false,
526
+ "model.layers.16.self_attn.k_norm": false,
527
+ "model.layers.16.self_attn.rope": false,
528
+ "model.layers.16.mlp.gate_proj": {
529
+ "bits": 6,
530
+ "group_size": 64
531
+ },
532
+ "model.layers.16.mlp.down_proj": {
533
+ "bits": 6,
534
+ "group_size": 64
535
+ },
536
+ "model.layers.16.mlp.up_proj": {
537
+ "bits": 6,
538
+ "group_size": 64
539
+ },
540
+ "model.layers.16.input_layernorm": false,
541
+ "model.layers.16.post_attention_layernorm": false,
542
+ "model.layers.16.pre_feedforward_layernorm": false,
543
+ "model.layers.16.post_feedforward_layernorm": false,
544
+ "model.layers.17.self_attn.q_proj": {
545
+ "bits": 6,
546
+ "group_size": 64
547
+ },
548
+ "model.layers.17.self_attn.k_proj": {
549
+ "bits": 6,
550
+ "group_size": 64
551
+ },
552
+ "model.layers.17.self_attn.v_proj": true,
553
+ "model.layers.17.self_attn.o_proj": {
554
+ "bits": 6,
555
+ "group_size": 64
556
+ },
557
+ "model.layers.17.self_attn.q_norm": false,
558
+ "model.layers.17.self_attn.k_norm": false,
559
+ "model.layers.17.self_attn.rope": false,
560
+ "model.layers.17.mlp.gate_proj": {
561
+ "bits": 6,
562
+ "group_size": 64
563
+ },
564
+ "model.layers.17.mlp.down_proj": {
565
+ "bits": 6,
566
+ "group_size": 64
567
+ },
568
+ "model.layers.17.mlp.up_proj": {
569
+ "bits": 6,
570
+ "group_size": 64
571
+ },
572
+ "model.layers.17.input_layernorm": false,
573
+ "model.layers.17.post_attention_layernorm": false,
574
+ "model.layers.17.pre_feedforward_layernorm": false,
575
+ "model.layers.17.post_feedforward_layernorm": false,
576
+ "model.layers.18.self_attn.q_proj": {
577
+ "bits": 6,
578
+ "group_size": 64
579
+ },
580
+ "model.layers.18.self_attn.k_proj": {
581
+ "bits": 6,
582
+ "group_size": 64
583
+ },
584
+ "model.layers.18.self_attn.v_proj": {
585
+ "bits": 6,
586
+ "group_size": 64
587
+ },
588
+ "model.layers.18.self_attn.o_proj": true,
589
+ "model.layers.18.self_attn.q_norm": false,
590
+ "model.layers.18.self_attn.k_norm": false,
591
+ "model.layers.18.self_attn.rope": false,
592
+ "model.layers.18.mlp.gate_proj": {
593
+ "bits": 6,
594
+ "group_size": 64
595
+ },
596
+ "model.layers.18.mlp.down_proj": {
597
+ "bits": 6,
598
+ "group_size": 64
599
+ },
600
+ "model.layers.18.mlp.up_proj": {
601
+ "bits": 6,
602
+ "group_size": 64
603
+ },
604
+ "model.layers.18.input_layernorm": false,
605
+ "model.layers.18.post_attention_layernorm": false,
606
+ "model.layers.18.pre_feedforward_layernorm": false,
607
+ "model.layers.18.post_feedforward_layernorm": false,
608
+ "model.layers.19.self_attn.q_proj": true,
609
+ "model.layers.19.self_attn.k_proj": true,
610
+ "model.layers.19.self_attn.v_proj": {
611
+ "bits": 6,
612
+ "group_size": 64
613
+ },
614
+ "model.layers.19.self_attn.o_proj": {
615
+ "bits": 6,
616
+ "group_size": 64
617
+ },
618
+ "model.layers.19.self_attn.q_norm": false,
619
+ "model.layers.19.self_attn.k_norm": false,
620
+ "model.layers.19.self_attn.rope": false,
621
+ "model.layers.19.mlp.gate_proj": {
622
+ "bits": 6,
623
+ "group_size": 64
624
+ },
625
+ "model.layers.19.mlp.down_proj": {
626
+ "bits": 6,
627
+ "group_size": 64
628
+ },
629
+ "model.layers.19.mlp.up_proj": {
630
+ "bits": 6,
631
+ "group_size": 64
632
+ },
633
+ "model.layers.19.input_layernorm": false,
634
+ "model.layers.19.post_attention_layernorm": false,
635
+ "model.layers.19.pre_feedforward_layernorm": false,
636
+ "model.layers.19.post_feedforward_layernorm": false,
637
+ "model.layers.20.self_attn.q_proj": {
638
+ "bits": 6,
639
+ "group_size": 64
640
+ },
641
+ "model.layers.20.self_attn.k_proj": true,
642
+ "model.layers.20.self_attn.v_proj": {
643
+ "bits": 6,
644
+ "group_size": 64
645
+ },
646
+ "model.layers.20.self_attn.o_proj": {
647
+ "bits": 6,
648
+ "group_size": 64
649
+ },
650
+ "model.layers.20.self_attn.q_norm": false,
651
+ "model.layers.20.self_attn.k_norm": false,
652
+ "model.layers.20.self_attn.rope": false,
653
+ "model.layers.20.mlp.gate_proj": {
654
+ "bits": 6,
655
+ "group_size": 64
656
+ },
657
+ "model.layers.20.mlp.down_proj": {
658
+ "bits": 6,
659
+ "group_size": 64
660
+ },
661
+ "model.layers.20.mlp.up_proj": {
662
+ "bits": 6,
663
+ "group_size": 64
664
+ },
665
+ "model.layers.20.input_layernorm": false,
666
+ "model.layers.20.post_attention_layernorm": false,
667
+ "model.layers.20.pre_feedforward_layernorm": false,
668
+ "model.layers.20.post_feedforward_layernorm": false,
669
+ "model.layers.21.self_attn.q_proj": {
670
+ "bits": 6,
671
+ "group_size": 64
672
+ },
673
+ "model.layers.21.self_attn.k_proj": {
674
+ "bits": 6,
675
+ "group_size": 64
676
+ },
677
+ "model.layers.21.self_attn.v_proj": {
678
+ "bits": 6,
679
+ "group_size": 64
680
+ },
681
+ "model.layers.21.self_attn.o_proj": {
682
+ "bits": 6,
683
+ "group_size": 64
684
+ },
685
+ "model.layers.21.self_attn.q_norm": false,
686
+ "model.layers.21.self_attn.k_norm": false,
687
+ "model.layers.21.self_attn.rope": false,
688
+ "model.layers.21.mlp.gate_proj": {
689
+ "bits": 6,
690
+ "group_size": 64
691
+ },
692
+ "model.layers.21.mlp.down_proj": {
693
+ "bits": 6,
694
+ "group_size": 64
695
+ },
696
+ "model.layers.21.mlp.up_proj": {
697
+ "bits": 6,
698
+ "group_size": 64
699
+ },
700
+ "model.layers.21.input_layernorm": false,
701
+ "model.layers.21.post_attention_layernorm": false,
702
+ "model.layers.21.pre_feedforward_layernorm": false,
703
+ "model.layers.21.post_feedforward_layernorm": false,
704
+ "model.layers.22.self_attn.q_proj": {
705
+ "bits": 6,
706
+ "group_size": 64
707
+ },
708
+ "model.layers.22.self_attn.k_proj": {
709
+ "bits": 6,
710
+ "group_size": 64
711
+ },
712
+ "model.layers.22.self_attn.v_proj": true,
713
+ "model.layers.22.self_attn.o_proj": {
714
+ "bits": 6,
715
+ "group_size": 64
716
+ },
717
+ "model.layers.22.self_attn.q_norm": false,
718
+ "model.layers.22.self_attn.k_norm": false,
719
+ "model.layers.22.self_attn.rope": false,
720
+ "model.layers.22.mlp.gate_proj": {
721
+ "bits": 6,
722
+ "group_size": 64
723
+ },
724
+ "model.layers.22.mlp.down_proj": {
725
+ "bits": 6,
726
+ "group_size": 64
727
+ },
728
+ "model.layers.22.mlp.up_proj": {
729
+ "bits": 6,
730
+ "group_size": 64
731
+ },
732
+ "model.layers.22.input_layernorm": false,
733
+ "model.layers.22.post_attention_layernorm": false,
734
+ "model.layers.22.pre_feedforward_layernorm": false,
735
+ "model.layers.22.post_feedforward_layernorm": false,
736
+ "model.layers.23.self_attn.q_proj": {
737
+ "bits": 6,
738
+ "group_size": 64
739
+ },
740
+ "model.layers.23.self_attn.k_proj": {
741
+ "bits": 6,
742
+ "group_size": 64
743
+ },
744
+ "model.layers.23.self_attn.v_proj": true,
745
+ "model.layers.23.self_attn.o_proj": {
746
+ "bits": 6,
747
+ "group_size": 64
748
+ },
749
+ "model.layers.23.self_attn.q_norm": false,
750
+ "model.layers.23.self_attn.k_norm": false,
751
+ "model.layers.23.self_attn.rope": false,
752
+ "model.layers.23.mlp.gate_proj": {
753
+ "bits": 6,
754
+ "group_size": 64
755
+ },
756
+ "model.layers.23.mlp.down_proj": true,
757
+ "model.layers.23.mlp.up_proj": {
758
+ "bits": 6,
759
+ "group_size": 64
760
+ },
761
+ "model.layers.23.input_layernorm": false,
762
+ "model.layers.23.post_attention_layernorm": false,
763
+ "model.layers.23.pre_feedforward_layernorm": false,
764
+ "model.layers.23.post_feedforward_layernorm": false,
765
+ "model.layers.24.self_attn.q_proj": {
766
+ "bits": 6,
767
+ "group_size": 64
768
+ },
769
+ "model.layers.24.self_attn.k_proj": {
770
+ "bits": 6,
771
+ "group_size": 64
772
+ },
773
+ "model.layers.24.self_attn.v_proj": true,
774
+ "model.layers.24.self_attn.o_proj": true,
775
+ "model.layers.24.self_attn.q_norm": false,
776
+ "model.layers.24.self_attn.k_norm": false,
777
+ "model.layers.24.self_attn.rope": false,
778
+ "model.layers.24.mlp.gate_proj": {
779
+ "bits": 6,
780
+ "group_size": 64
781
+ },
782
+ "model.layers.24.mlp.down_proj": {
783
+ "bits": 6,
784
+ "group_size": 64
785
+ },
786
+ "model.layers.24.mlp.up_proj": {
787
+ "bits": 6,
788
+ "group_size": 64
789
+ },
790
+ "model.layers.24.input_layernorm": false,
791
+ "model.layers.24.post_attention_layernorm": false,
792
+ "model.layers.24.pre_feedforward_layernorm": false,
793
+ "model.layers.24.post_feedforward_layernorm": false,
794
+ "model.layers.25.self_attn.q_proj": {
795
+ "bits": 6,
796
+ "group_size": 64
797
+ },
798
+ "model.layers.25.self_attn.k_proj": {
799
+ "bits": 6,
800
+ "group_size": 64
801
+ },
802
+ "model.layers.25.self_attn.v_proj": true,
803
+ "model.layers.25.self_attn.o_proj": true,
804
+ "model.layers.25.self_attn.q_norm": false,
805
+ "model.layers.25.self_attn.k_norm": false,
806
+ "model.layers.25.self_attn.rope": false,
807
+ "model.layers.25.mlp.gate_proj": true,
808
+ "model.layers.25.mlp.down_proj": true,
809
+ "model.layers.25.mlp.up_proj": true,
810
+ "model.layers.25.input_layernorm": false,
811
+ "model.layers.25.post_attention_layernorm": false,
812
+ "model.layers.25.pre_feedforward_layernorm": false,
813
+ "model.layers.25.post_feedforward_layernorm": false,
814
+ "model.norm": false,
815
+ "lm_head": {
816
+ "bits": 6,
817
+ "group_size": 64
818
+ }
819
+ },
820
+ "quantization_config": {
821
+ "group_size": 64,
822
+ "bits": 2,
823
+ "model.embed_tokens": {
824
+ "bits": 6,
825
+ "group_size": 64
826
+ },
827
+ "model.layers.0.self_attn.q_proj": {
828
+ "bits": 6,
829
+ "group_size": 64
830
+ },
831
+ "model.layers.0.self_attn.k_proj": true,
832
+ "model.layers.0.self_attn.v_proj": {
833
+ "bits": 6,
834
+ "group_size": 64
835
+ },
836
+ "model.layers.0.self_attn.o_proj": {
837
+ "bits": 6,
838
+ "group_size": 64
839
+ },
840
+ "model.layers.0.self_attn.q_norm": false,
841
+ "model.layers.0.self_attn.k_norm": false,
842
+ "model.layers.0.self_attn.rope": false,
843
+ "model.layers.0.mlp.gate_proj": {
844
+ "bits": 6,
845
+ "group_size": 64
846
+ },
847
+ "model.layers.0.mlp.down_proj": true,
848
+ "model.layers.0.mlp.up_proj": {
849
+ "bits": 6,
850
+ "group_size": 64
851
+ },
852
+ "model.layers.0.input_layernorm": false,
853
+ "model.layers.0.post_attention_layernorm": false,
854
+ "model.layers.0.pre_feedforward_layernorm": false,
855
+ "model.layers.0.post_feedforward_layernorm": false,
856
+ "model.layers.1.self_attn.q_proj": {
857
+ "bits": 6,
858
+ "group_size": 64
859
+ },
860
+ "model.layers.1.self_attn.k_proj": true,
861
+ "model.layers.1.self_attn.v_proj": {
862
+ "bits": 6,
863
+ "group_size": 64
864
+ },
865
+ "model.layers.1.self_attn.o_proj": {
866
+ "bits": 6,
867
+ "group_size": 64
868
+ },
869
+ "model.layers.1.self_attn.q_norm": false,
870
+ "model.layers.1.self_attn.k_norm": false,
871
+ "model.layers.1.self_attn.rope": false,
872
+ "model.layers.1.mlp.gate_proj": true,
873
+ "model.layers.1.mlp.down_proj": true,
874
+ "model.layers.1.mlp.up_proj": {
875
+ "bits": 6,
876
+ "group_size": 64
877
+ },
878
+ "model.layers.1.input_layernorm": false,
879
+ "model.layers.1.post_attention_layernorm": false,
880
+ "model.layers.1.pre_feedforward_layernorm": false,
881
+ "model.layers.1.post_feedforward_layernorm": false,
882
+ "model.layers.2.self_attn.q_proj": true,
883
+ "model.layers.2.self_attn.k_proj": true,
884
+ "model.layers.2.self_attn.v_proj": {
885
+ "bits": 6,
886
+ "group_size": 64
887
+ },
888
+ "model.layers.2.self_attn.o_proj": true,
889
+ "model.layers.2.self_attn.q_norm": false,
890
+ "model.layers.2.self_attn.k_norm": false,
891
+ "model.layers.2.self_attn.rope": false,
892
+ "model.layers.2.mlp.gate_proj": true,
893
+ "model.layers.2.mlp.down_proj": {
894
+ "bits": 6,
895
+ "group_size": 64
896
+ },
897
+ "model.layers.2.mlp.up_proj": {
898
+ "bits": 6,
899
+ "group_size": 64
900
+ },
901
+ "model.layers.2.input_layernorm": false,
902
+ "model.layers.2.post_attention_layernorm": false,
903
+ "model.layers.2.pre_feedforward_layernorm": false,
904
+ "model.layers.2.post_feedforward_layernorm": false,
905
+ "model.layers.3.self_attn.q_proj": {
906
+ "bits": 6,
907
+ "group_size": 64
908
+ },
909
+ "model.layers.3.self_attn.k_proj": {
910
+ "bits": 6,
911
+ "group_size": 64
912
+ },
913
+ "model.layers.3.self_attn.v_proj": {
914
+ "bits": 6,
915
+ "group_size": 64
916
+ },
917
+ "model.layers.3.self_attn.o_proj": true,
918
+ "model.layers.3.self_attn.q_norm": false,
919
+ "model.layers.3.self_attn.k_norm": false,
920
+ "model.layers.3.self_attn.rope": false,
921
+ "model.layers.3.mlp.gate_proj": {
922
+ "bits": 6,
923
+ "group_size": 64
924
+ },
925
+ "model.layers.3.mlp.down_proj": {
926
+ "bits": 6,
927
+ "group_size": 64
928
+ },
929
+ "model.layers.3.mlp.up_proj": true,
930
+ "model.layers.3.input_layernorm": false,
931
+ "model.layers.3.post_attention_layernorm": false,
932
+ "model.layers.3.pre_feedforward_layernorm": false,
933
+ "model.layers.3.post_feedforward_layernorm": false,
934
+ "model.layers.4.self_attn.q_proj": {
935
+ "bits": 6,
936
+ "group_size": 64
937
+ },
938
+ "model.layers.4.self_attn.k_proj": {
939
+ "bits": 6,
940
+ "group_size": 64
941
+ },
942
+ "model.layers.4.self_attn.v_proj": true,
943
+ "model.layers.4.self_attn.o_proj": true,
944
+ "model.layers.4.self_attn.q_norm": false,
945
+ "model.layers.4.self_attn.k_norm": false,
946
+ "model.layers.4.self_attn.rope": false,
947
+ "model.layers.4.mlp.gate_proj": {
948
+ "bits": 6,
949
+ "group_size": 64
950
+ },
951
+ "model.layers.4.mlp.down_proj": true,
952
+ "model.layers.4.mlp.up_proj": {
953
+ "bits": 6,
954
+ "group_size": 64
955
+ },
956
+ "model.layers.4.input_layernorm": false,
957
+ "model.layers.4.post_attention_layernorm": false,
958
+ "model.layers.4.pre_feedforward_layernorm": false,
959
+ "model.layers.4.post_feedforward_layernorm": false,
960
+ "model.layers.5.self_attn.q_proj": {
961
+ "bits": 6,
962
+ "group_size": 64
963
+ },
964
+ "model.layers.5.self_attn.k_proj": {
965
+ "bits": 6,
966
+ "group_size": 64
967
+ },
968
+ "model.layers.5.self_attn.v_proj": {
969
+ "bits": 6,
970
+ "group_size": 64
971
+ },
972
+ "model.layers.5.self_attn.o_proj": {
973
+ "bits": 6,
974
+ "group_size": 64
975
+ },
976
+ "model.layers.5.self_attn.q_norm": false,
977
+ "model.layers.5.self_attn.k_norm": false,
978
+ "model.layers.5.self_attn.rope": false,
979
+ "model.layers.5.mlp.gate_proj": true,
980
+ "model.layers.5.mlp.down_proj": {
981
+ "bits": 6,
982
+ "group_size": 64
983
+ },
984
+ "model.layers.5.mlp.up_proj": true,
985
+ "model.layers.5.input_layernorm": false,
986
+ "model.layers.5.post_attention_layernorm": false,
987
+ "model.layers.5.pre_feedforward_layernorm": false,
988
+ "model.layers.5.post_feedforward_layernorm": false,
989
+ "model.layers.6.self_attn.q_proj": true,
990
+ "model.layers.6.self_attn.k_proj": {
991
+ "bits": 6,
992
+ "group_size": 64
993
+ },
994
+ "model.layers.6.self_attn.v_proj": {
995
+ "bits": 6,
996
+ "group_size": 64
997
+ },
998
+ "model.layers.6.self_attn.o_proj": {
999
+ "bits": 6,
1000
+ "group_size": 64
1001
+ },
1002
+ "model.layers.6.self_attn.q_norm": false,
1003
+ "model.layers.6.self_attn.k_norm": false,
1004
+ "model.layers.6.self_attn.rope": false,
1005
+ "model.layers.6.mlp.gate_proj": {
1006
+ "bits": 6,
1007
+ "group_size": 64
1008
+ },
1009
+ "model.layers.6.mlp.down_proj": true,
1010
+ "model.layers.6.mlp.up_proj": {
1011
+ "bits": 6,
1012
+ "group_size": 64
1013
+ },
1014
+ "model.layers.6.input_layernorm": false,
1015
+ "model.layers.6.post_attention_layernorm": false,
1016
+ "model.layers.6.pre_feedforward_layernorm": false,
1017
+ "model.layers.6.post_feedforward_layernorm": false,
1018
+ "model.layers.7.self_attn.q_proj": true,
1019
+ "model.layers.7.self_attn.k_proj": {
1020
+ "bits": 6,
1021
+ "group_size": 64
1022
+ },
1023
+ "model.layers.7.self_attn.v_proj": true,
1024
+ "model.layers.7.self_attn.o_proj": {
1025
+ "bits": 6,
1026
+ "group_size": 64
1027
+ },
1028
+ "model.layers.7.self_attn.q_norm": false,
1029
+ "model.layers.7.self_attn.k_norm": false,
1030
+ "model.layers.7.self_attn.rope": false,
1031
+ "model.layers.7.mlp.gate_proj": {
1032
+ "bits": 6,
1033
+ "group_size": 64
1034
+ },
1035
+ "model.layers.7.mlp.down_proj": {
1036
+ "bits": 6,
1037
+ "group_size": 64
1038
+ },
1039
+ "model.layers.7.mlp.up_proj": {
1040
+ "bits": 6,
1041
+ "group_size": 64
1042
+ },
1043
+ "model.layers.7.input_layernorm": false,
1044
+ "model.layers.7.post_attention_layernorm": false,
1045
+ "model.layers.7.pre_feedforward_layernorm": false,
1046
+ "model.layers.7.post_feedforward_layernorm": false,
1047
+ "model.layers.8.self_attn.q_proj": true,
1048
+ "model.layers.8.self_attn.k_proj": {
1049
+ "bits": 6,
1050
+ "group_size": 64
1051
+ },
1052
+ "model.layers.8.self_attn.v_proj": {
1053
+ "bits": 6,
1054
+ "group_size": 64
1055
+ },
1056
+ "model.layers.8.self_attn.o_proj": {
1057
+ "bits": 6,
1058
+ "group_size": 64
1059
+ },
1060
+ "model.layers.8.self_attn.q_norm": false,
1061
+ "model.layers.8.self_attn.k_norm": false,
1062
+ "model.layers.8.self_attn.rope": false,
1063
+ "model.layers.8.mlp.gate_proj": {
1064
+ "bits": 6,
1065
+ "group_size": 64
1066
+ },
1067
+ "model.layers.8.mlp.down_proj": {
1068
+ "bits": 6,
1069
+ "group_size": 64
1070
+ },
1071
+ "model.layers.8.mlp.up_proj": {
1072
+ "bits": 6,
1073
+ "group_size": 64
1074
+ },
1075
+ "model.layers.8.input_layernorm": false,
1076
+ "model.layers.8.post_attention_layernorm": false,
1077
+ "model.layers.8.pre_feedforward_layernorm": false,
1078
+ "model.layers.8.post_feedforward_layernorm": false,
1079
+ "model.layers.9.self_attn.q_proj": {
1080
+ "bits": 6,
1081
+ "group_size": 64
1082
+ },
1083
+ "model.layers.9.self_attn.k_proj": {
1084
+ "bits": 6,
1085
+ "group_size": 64
1086
+ },
1087
+ "model.layers.9.self_attn.v_proj": {
1088
+ "bits": 6,
1089
+ "group_size": 64
1090
+ },
1091
+ "model.layers.9.self_attn.o_proj": true,
1092
+ "model.layers.9.self_attn.q_norm": false,
1093
+ "model.layers.9.self_attn.k_norm": false,
1094
+ "model.layers.9.self_attn.rope": false,
1095
+ "model.layers.9.mlp.gate_proj": {
1096
+ "bits": 6,
1097
+ "group_size": 64
1098
+ },
1099
+ "model.layers.9.mlp.down_proj": {
1100
+ "bits": 6,
1101
+ "group_size": 64
1102
+ },
1103
+ "model.layers.9.mlp.up_proj": true,
1104
+ "model.layers.9.input_layernorm": false,
1105
+ "model.layers.9.post_attention_layernorm": false,
1106
+ "model.layers.9.pre_feedforward_layernorm": false,
1107
+ "model.layers.9.post_feedforward_layernorm": false,
1108
+ "model.layers.10.self_attn.q_proj": {
1109
+ "bits": 6,
1110
+ "group_size": 64
1111
+ },
1112
+ "model.layers.10.self_attn.k_proj": {
1113
+ "bits": 6,
1114
+ "group_size": 64
1115
+ },
1116
+ "model.layers.10.self_attn.v_proj": true,
1117
+ "model.layers.10.self_attn.o_proj": {
1118
+ "bits": 6,
1119
+ "group_size": 64
1120
+ },
1121
+ "model.layers.10.self_attn.q_norm": false,
1122
+ "model.layers.10.self_attn.k_norm": false,
1123
+ "model.layers.10.self_attn.rope": false,
1124
+ "model.layers.10.mlp.gate_proj": {
1125
+ "bits": 6,
1126
+ "group_size": 64
1127
+ },
1128
+ "model.layers.10.mlp.down_proj": {
1129
+ "bits": 6,
1130
+ "group_size": 64
1131
+ },
1132
+ "model.layers.10.mlp.up_proj": {
1133
+ "bits": 6,
1134
+ "group_size": 64
1135
+ },
1136
+ "model.layers.10.input_layernorm": false,
1137
+ "model.layers.10.post_attention_layernorm": false,
1138
+ "model.layers.10.pre_feedforward_layernorm": false,
1139
+ "model.layers.10.post_feedforward_layernorm": false,
1140
+ "model.layers.11.self_attn.q_proj": {
1141
+ "bits": 6,
1142
+ "group_size": 64
1143
+ },
1144
+ "model.layers.11.self_attn.k_proj": true,
1145
+ "model.layers.11.self_attn.v_proj": {
1146
+ "bits": 6,
1147
+ "group_size": 64
1148
+ },
1149
+ "model.layers.11.self_attn.o_proj": {
1150
+ "bits": 6,
1151
+ "group_size": 64
1152
+ },
1153
+ "model.layers.11.self_attn.q_norm": false,
1154
+ "model.layers.11.self_attn.k_norm": false,
1155
+ "model.layers.11.self_attn.rope": false,
1156
+ "model.layers.11.mlp.gate_proj": {
1157
+ "bits": 6,
1158
+ "group_size": 64
1159
+ },
1160
+ "model.layers.11.mlp.down_proj": {
1161
+ "bits": 6,
1162
+ "group_size": 64
1163
+ },
1164
+ "model.layers.11.mlp.up_proj": {
1165
+ "bits": 6,
1166
+ "group_size": 64
1167
+ },
1168
+ "model.layers.11.input_layernorm": false,
1169
+ "model.layers.11.post_attention_layernorm": false,
1170
+ "model.layers.11.pre_feedforward_layernorm": false,
1171
+ "model.layers.11.post_feedforward_layernorm": false,
1172
+ "model.layers.12.self_attn.q_proj": {
1173
+ "bits": 6,
1174
+ "group_size": 64
1175
+ },
1176
+ "model.layers.12.self_attn.k_proj": {
1177
+ "bits": 6,
1178
+ "group_size": 64
1179
+ },
1180
+ "model.layers.12.self_attn.v_proj": true,
1181
+ "model.layers.12.self_attn.o_proj": {
1182
+ "bits": 6,
1183
+ "group_size": 64
1184
+ },
1185
+ "model.layers.12.self_attn.q_norm": false,
1186
+ "model.layers.12.self_attn.k_norm": false,
1187
+ "model.layers.12.self_attn.rope": false,
1188
+ "model.layers.12.mlp.gate_proj": {
1189
+ "bits": 6,
1190
+ "group_size": 64
1191
+ },
1192
+ "model.layers.12.mlp.down_proj": {
1193
+ "bits": 6,
1194
+ "group_size": 64
1195
+ },
1196
+ "model.layers.12.mlp.up_proj": {
1197
+ "bits": 6,
1198
+ "group_size": 64
1199
+ },
1200
+ "model.layers.12.input_layernorm": false,
1201
+ "model.layers.12.post_attention_layernorm": false,
1202
+ "model.layers.12.pre_feedforward_layernorm": false,
1203
+ "model.layers.12.post_feedforward_layernorm": false,
1204
+ "model.layers.13.self_attn.q_proj": {
1205
+ "bits": 6,
1206
+ "group_size": 64
1207
+ },
1208
+ "model.layers.13.self_attn.k_proj": {
1209
+ "bits": 6,
1210
+ "group_size": 64
1211
+ },
1212
+ "model.layers.13.self_attn.v_proj": true,
1213
+ "model.layers.13.self_attn.o_proj": {
1214
+ "bits": 6,
1215
+ "group_size": 64
1216
+ },
1217
+ "model.layers.13.self_attn.q_norm": false,
1218
+ "model.layers.13.self_attn.k_norm": false,
1219
+ "model.layers.13.self_attn.rope": false,
1220
+ "model.layers.13.mlp.gate_proj": {
1221
+ "bits": 6,
1222
+ "group_size": 64
1223
+ },
1224
+ "model.layers.13.mlp.down_proj": {
1225
+ "bits": 6,
1226
+ "group_size": 64
1227
+ },
1228
+ "model.layers.13.mlp.up_proj": {
1229
+ "bits": 6,
1230
+ "group_size": 64
1231
+ },
1232
+ "model.layers.13.input_layernorm": false,
1233
+ "model.layers.13.post_attention_layernorm": false,
1234
+ "model.layers.13.pre_feedforward_layernorm": false,
1235
+ "model.layers.13.post_feedforward_layernorm": false,
1236
+ "model.layers.14.self_attn.q_proj": {
1237
+ "bits": 6,
1238
+ "group_size": 64
1239
+ },
1240
+ "model.layers.14.self_attn.k_proj": {
1241
+ "bits": 6,
1242
+ "group_size": 64
1243
+ },
1244
+ "model.layers.14.self_attn.v_proj": {
1245
+ "bits": 6,
1246
+ "group_size": 64
1247
+ },
1248
+ "model.layers.14.self_attn.o_proj": {
1249
+ "bits": 6,
1250
+ "group_size": 64
1251
+ },
1252
+ "model.layers.14.self_attn.q_norm": false,
1253
+ "model.layers.14.self_attn.k_norm": false,
1254
+ "model.layers.14.self_attn.rope": false,
1255
+ "model.layers.14.mlp.gate_proj": {
1256
+ "bits": 6,
1257
+ "group_size": 64
1258
+ },
1259
+ "model.layers.14.mlp.down_proj": {
1260
+ "bits": 6,
1261
+ "group_size": 64
1262
+ },
1263
+ "model.layers.14.mlp.up_proj": {
1264
+ "bits": 6,
1265
+ "group_size": 64
1266
+ },
1267
+ "model.layers.14.input_layernorm": false,
1268
+ "model.layers.14.post_attention_layernorm": false,
1269
+ "model.layers.14.pre_feedforward_layernorm": false,
1270
+ "model.layers.14.post_feedforward_layernorm": false,
1271
+ "model.layers.15.self_attn.q_proj": {
1272
+ "bits": 6,
1273
+ "group_size": 64
1274
+ },
1275
+ "model.layers.15.self_attn.k_proj": {
1276
+ "bits": 6,
1277
+ "group_size": 64
1278
+ },
1279
+ "model.layers.15.self_attn.v_proj": true,
1280
+ "model.layers.15.self_attn.o_proj": {
1281
+ "bits": 6,
1282
+ "group_size": 64
1283
+ },
1284
+ "model.layers.15.self_attn.q_norm": false,
1285
+ "model.layers.15.self_attn.k_norm": false,
1286
+ "model.layers.15.self_attn.rope": false,
1287
+ "model.layers.15.mlp.gate_proj": {
1288
+ "bits": 6,
1289
+ "group_size": 64
1290
+ },
1291
+ "model.layers.15.mlp.down_proj": {
1292
+ "bits": 6,
1293
+ "group_size": 64
1294
+ },
1295
+ "model.layers.15.mlp.up_proj": {
1296
+ "bits": 6,
1297
+ "group_size": 64
1298
+ },
1299
+ "model.layers.15.input_layernorm": false,
1300
+ "model.layers.15.post_attention_layernorm": false,
1301
+ "model.layers.15.pre_feedforward_layernorm": false,
1302
+ "model.layers.15.post_feedforward_layernorm": false,
1303
+ "model.layers.16.self_attn.q_proj": {
1304
+ "bits": 6,
1305
+ "group_size": 64
1306
+ },
1307
+ "model.layers.16.self_attn.k_proj": {
1308
+ "bits": 6,
1309
+ "group_size": 64
1310
+ },
1311
+ "model.layers.16.self_attn.v_proj": {
1312
+ "bits": 6,
1313
+ "group_size": 64
1314
+ },
1315
+ "model.layers.16.self_attn.o_proj": {
1316
+ "bits": 6,
1317
+ "group_size": 64
1318
+ },
1319
+ "model.layers.16.self_attn.q_norm": false,
1320
+ "model.layers.16.self_attn.k_norm": false,
1321
+ "model.layers.16.self_attn.rope": false,
1322
+ "model.layers.16.mlp.gate_proj": {
1323
+ "bits": 6,
1324
+ "group_size": 64
1325
+ },
1326
+ "model.layers.16.mlp.down_proj": {
1327
+ "bits": 6,
1328
+ "group_size": 64
1329
+ },
1330
+ "model.layers.16.mlp.up_proj": {
1331
+ "bits": 6,
1332
+ "group_size": 64
1333
+ },
1334
+ "model.layers.16.input_layernorm": false,
1335
+ "model.layers.16.post_attention_layernorm": false,
1336
+ "model.layers.16.pre_feedforward_layernorm": false,
1337
+ "model.layers.16.post_feedforward_layernorm": false,
1338
+ "model.layers.17.self_attn.q_proj": {
1339
+ "bits": 6,
1340
+ "group_size": 64
1341
+ },
1342
+ "model.layers.17.self_attn.k_proj": {
1343
+ "bits": 6,
1344
+ "group_size": 64
1345
+ },
1346
+ "model.layers.17.self_attn.v_proj": true,
1347
+ "model.layers.17.self_attn.o_proj": {
1348
+ "bits": 6,
1349
+ "group_size": 64
1350
+ },
1351
+ "model.layers.17.self_attn.q_norm": false,
1352
+ "model.layers.17.self_attn.k_norm": false,
1353
+ "model.layers.17.self_attn.rope": false,
1354
+ "model.layers.17.mlp.gate_proj": {
1355
+ "bits": 6,
1356
+ "group_size": 64
1357
+ },
1358
+ "model.layers.17.mlp.down_proj": {
1359
+ "bits": 6,
1360
+ "group_size": 64
1361
+ },
1362
+ "model.layers.17.mlp.up_proj": {
1363
+ "bits": 6,
1364
+ "group_size": 64
1365
+ },
1366
+ "model.layers.17.input_layernorm": false,
1367
+ "model.layers.17.post_attention_layernorm": false,
1368
+ "model.layers.17.pre_feedforward_layernorm": false,
1369
+ "model.layers.17.post_feedforward_layernorm": false,
1370
+ "model.layers.18.self_attn.q_proj": {
1371
+ "bits": 6,
1372
+ "group_size": 64
1373
+ },
1374
+ "model.layers.18.self_attn.k_proj": {
1375
+ "bits": 6,
1376
+ "group_size": 64
1377
+ },
1378
+ "model.layers.18.self_attn.v_proj": {
1379
+ "bits": 6,
1380
+ "group_size": 64
1381
+ },
1382
+ "model.layers.18.self_attn.o_proj": true,
1383
+ "model.layers.18.self_attn.q_norm": false,
1384
+ "model.layers.18.self_attn.k_norm": false,
1385
+ "model.layers.18.self_attn.rope": false,
1386
+ "model.layers.18.mlp.gate_proj": {
1387
+ "bits": 6,
1388
+ "group_size": 64
1389
+ },
1390
+ "model.layers.18.mlp.down_proj": {
1391
+ "bits": 6,
1392
+ "group_size": 64
1393
+ },
1394
+ "model.layers.18.mlp.up_proj": {
1395
+ "bits": 6,
1396
+ "group_size": 64
1397
+ },
1398
+ "model.layers.18.input_layernorm": false,
1399
+ "model.layers.18.post_attention_layernorm": false,
1400
+ "model.layers.18.pre_feedforward_layernorm": false,
1401
+ "model.layers.18.post_feedforward_layernorm": false,
1402
+ "model.layers.19.self_attn.q_proj": true,
1403
+ "model.layers.19.self_attn.k_proj": true,
1404
+ "model.layers.19.self_attn.v_proj": {
1405
+ "bits": 6,
1406
+ "group_size": 64
1407
+ },
1408
+ "model.layers.19.self_attn.o_proj": {
1409
+ "bits": 6,
1410
+ "group_size": 64
1411
+ },
1412
+ "model.layers.19.self_attn.q_norm": false,
1413
+ "model.layers.19.self_attn.k_norm": false,
1414
+ "model.layers.19.self_attn.rope": false,
1415
+ "model.layers.19.mlp.gate_proj": {
1416
+ "bits": 6,
1417
+ "group_size": 64
1418
+ },
1419
+ "model.layers.19.mlp.down_proj": {
1420
+ "bits": 6,
1421
+ "group_size": 64
1422
+ },
1423
+ "model.layers.19.mlp.up_proj": {
1424
+ "bits": 6,
1425
+ "group_size": 64
1426
+ },
1427
+ "model.layers.19.input_layernorm": false,
1428
+ "model.layers.19.post_attention_layernorm": false,
1429
+ "model.layers.19.pre_feedforward_layernorm": false,
1430
+ "model.layers.19.post_feedforward_layernorm": false,
1431
+ "model.layers.20.self_attn.q_proj": {
1432
+ "bits": 6,
1433
+ "group_size": 64
1434
+ },
1435
+ "model.layers.20.self_attn.k_proj": true,
1436
+ "model.layers.20.self_attn.v_proj": {
1437
+ "bits": 6,
1438
+ "group_size": 64
1439
+ },
1440
+ "model.layers.20.self_attn.o_proj": {
1441
+ "bits": 6,
1442
+ "group_size": 64
1443
+ },
1444
+ "model.layers.20.self_attn.q_norm": false,
1445
+ "model.layers.20.self_attn.k_norm": false,
1446
+ "model.layers.20.self_attn.rope": false,
1447
+ "model.layers.20.mlp.gate_proj": {
1448
+ "bits": 6,
1449
+ "group_size": 64
1450
+ },
1451
+ "model.layers.20.mlp.down_proj": {
1452
+ "bits": 6,
1453
+ "group_size": 64
1454
+ },
1455
+ "model.layers.20.mlp.up_proj": {
1456
+ "bits": 6,
1457
+ "group_size": 64
1458
+ },
1459
+ "model.layers.20.input_layernorm": false,
1460
+ "model.layers.20.post_attention_layernorm": false,
1461
+ "model.layers.20.pre_feedforward_layernorm": false,
1462
+ "model.layers.20.post_feedforward_layernorm": false,
1463
+ "model.layers.21.self_attn.q_proj": {
1464
+ "bits": 6,
1465
+ "group_size": 64
1466
+ },
1467
+ "model.layers.21.self_attn.k_proj": {
1468
+ "bits": 6,
1469
+ "group_size": 64
1470
+ },
1471
+ "model.layers.21.self_attn.v_proj": {
1472
+ "bits": 6,
1473
+ "group_size": 64
1474
+ },
1475
+ "model.layers.21.self_attn.o_proj": {
1476
+ "bits": 6,
1477
+ "group_size": 64
1478
+ },
1479
+ "model.layers.21.self_attn.q_norm": false,
1480
+ "model.layers.21.self_attn.k_norm": false,
1481
+ "model.layers.21.self_attn.rope": false,
1482
+ "model.layers.21.mlp.gate_proj": {
1483
+ "bits": 6,
1484
+ "group_size": 64
1485
+ },
1486
+ "model.layers.21.mlp.down_proj": {
1487
+ "bits": 6,
1488
+ "group_size": 64
1489
+ },
1490
+ "model.layers.21.mlp.up_proj": {
1491
+ "bits": 6,
1492
+ "group_size": 64
1493
+ },
1494
+ "model.layers.21.input_layernorm": false,
1495
+ "model.layers.21.post_attention_layernorm": false,
1496
+ "model.layers.21.pre_feedforward_layernorm": false,
1497
+ "model.layers.21.post_feedforward_layernorm": false,
1498
+ "model.layers.22.self_attn.q_proj": {
1499
+ "bits": 6,
1500
+ "group_size": 64
1501
+ },
1502
+ "model.layers.22.self_attn.k_proj": {
1503
+ "bits": 6,
1504
+ "group_size": 64
1505
+ },
1506
+ "model.layers.22.self_attn.v_proj": true,
1507
+ "model.layers.22.self_attn.o_proj": {
1508
+ "bits": 6,
1509
+ "group_size": 64
1510
+ },
1511
+ "model.layers.22.self_attn.q_norm": false,
1512
+ "model.layers.22.self_attn.k_norm": false,
1513
+ "model.layers.22.self_attn.rope": false,
1514
+ "model.layers.22.mlp.gate_proj": {
1515
+ "bits": 6,
1516
+ "group_size": 64
1517
+ },
1518
+ "model.layers.22.mlp.down_proj": {
1519
+ "bits": 6,
1520
+ "group_size": 64
1521
+ },
1522
+ "model.layers.22.mlp.up_proj": {
1523
+ "bits": 6,
1524
+ "group_size": 64
1525
+ },
1526
+ "model.layers.22.input_layernorm": false,
1527
+ "model.layers.22.post_attention_layernorm": false,
1528
+ "model.layers.22.pre_feedforward_layernorm": false,
1529
+ "model.layers.22.post_feedforward_layernorm": false,
1530
+ "model.layers.23.self_attn.q_proj": {
1531
+ "bits": 6,
1532
+ "group_size": 64
1533
+ },
1534
+ "model.layers.23.self_attn.k_proj": {
1535
+ "bits": 6,
1536
+ "group_size": 64
1537
+ },
1538
+ "model.layers.23.self_attn.v_proj": true,
1539
+ "model.layers.23.self_attn.o_proj": {
1540
+ "bits": 6,
1541
+ "group_size": 64
1542
+ },
1543
+ "model.layers.23.self_attn.q_norm": false,
1544
+ "model.layers.23.self_attn.k_norm": false,
1545
+ "model.layers.23.self_attn.rope": false,
1546
+ "model.layers.23.mlp.gate_proj": {
1547
+ "bits": 6,
1548
+ "group_size": 64
1549
+ },
1550
+ "model.layers.23.mlp.down_proj": true,
1551
+ "model.layers.23.mlp.up_proj": {
1552
+ "bits": 6,
1553
+ "group_size": 64
1554
+ },
1555
+ "model.layers.23.input_layernorm": false,
1556
+ "model.layers.23.post_attention_layernorm": false,
1557
+ "model.layers.23.pre_feedforward_layernorm": false,
1558
+ "model.layers.23.post_feedforward_layernorm": false,
1559
+ "model.layers.24.self_attn.q_proj": {
1560
+ "bits": 6,
1561
+ "group_size": 64
1562
+ },
1563
+ "model.layers.24.self_attn.k_proj": {
1564
+ "bits": 6,
1565
+ "group_size": 64
1566
+ },
1567
+ "model.layers.24.self_attn.v_proj": true,
1568
+ "model.layers.24.self_attn.o_proj": true,
1569
+ "model.layers.24.self_attn.q_norm": false,
1570
+ "model.layers.24.self_attn.k_norm": false,
1571
+ "model.layers.24.self_attn.rope": false,
1572
+ "model.layers.24.mlp.gate_proj": {
1573
+ "bits": 6,
1574
+ "group_size": 64
1575
+ },
1576
+ "model.layers.24.mlp.down_proj": {
1577
+ "bits": 6,
1578
+ "group_size": 64
1579
+ },
1580
+ "model.layers.24.mlp.up_proj": {
1581
+ "bits": 6,
1582
+ "group_size": 64
1583
+ },
1584
+ "model.layers.24.input_layernorm": false,
1585
+ "model.layers.24.post_attention_layernorm": false,
1586
+ "model.layers.24.pre_feedforward_layernorm": false,
1587
+ "model.layers.24.post_feedforward_layernorm": false,
1588
+ "model.layers.25.self_attn.q_proj": {
1589
+ "bits": 6,
1590
+ "group_size": 64
1591
+ },
1592
+ "model.layers.25.self_attn.k_proj": {
1593
+ "bits": 6,
1594
+ "group_size": 64
1595
+ },
1596
+ "model.layers.25.self_attn.v_proj": true,
1597
+ "model.layers.25.self_attn.o_proj": true,
1598
+ "model.layers.25.self_attn.q_norm": false,
1599
+ "model.layers.25.self_attn.k_norm": false,
1600
+ "model.layers.25.self_attn.rope": false,
1601
+ "model.layers.25.mlp.gate_proj": true,
1602
+ "model.layers.25.mlp.down_proj": true,
1603
+ "model.layers.25.mlp.up_proj": true,
1604
+ "model.layers.25.input_layernorm": false,
1605
+ "model.layers.25.post_attention_layernorm": false,
1606
+ "model.layers.25.pre_feedforward_layernorm": false,
1607
+ "model.layers.25.post_feedforward_layernorm": false,
1608
+ "model.norm": false,
1609
+ "lm_head": {
1610
+ "bits": 6,
1611
+ "group_size": 64
1612
+ }
1613
+ },
1614
+ "query_pre_attn_scalar": 256,
1615
+ "rms_norm_eps": 1e-06,
1616
+ "rope_local_base_freq": 10000,
1617
+ "rope_scaling": null,
1618
+ "rope_theta": 1000000,
1619
+ "sliding_window": 512,
1620
+ "sliding_window_pattern": 6,
1621
+ "torch_dtype": "bfloat16",
1622
+ "transformers_version": "4.50.0.dev0",
1623
+ "use_cache": true,
1624
+ "vocab_size": 262144
1625
+ }