Qubitium commited on
Commit
614afdb
·
verified ·
1 Parent(s): 2068ec3

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -26,7 +26,7 @@
26
  "bits": 4,
27
  "checkpoint_format": "gptq",
28
  "desc_act": false,
29
- "group_size": 128,
30
  "lm_head": false,
31
  "meta": {
32
  "act_group_aware": true,
 
26
  "bits": 4,
27
  "checkpoint_format": "gptq",
28
  "desc_act": false,
29
+ "group_size": 32,
30
  "lm_head": false,
31
  "meta": {
32
  "act_group_aware": true,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68185f31dc99b779429b4eef3b3455e500b41ad6cb1a484959b1644787f717b2
3
- size 4287795964
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fc7733046c66beb7e1ef70a4013e7cd6584015eee17cbe59e9a120b8fae75e3
3
+ size 4282264494
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f14a7713787b2535114a10886f7c2f941bd5a30df68f49ac2b3167d1d4c6a97
3
- size 2440158770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91cb510d5927cb9d646fd9b15415193008170716f20b7c0cb63179df88048bde
3
+ size 2923693982
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 6727954734
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00001-of-00002.safetensors",
@@ -209,13 +209,13 @@
209
  "model.layers.14.self_attn.v_proj.qzeros": "model-00001-of-00002.safetensors",
210
  "model.layers.14.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
211
  "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
212
- "model.layers.15.mlp.down_proj.g_idx": "model-00001-of-00002.safetensors",
213
- "model.layers.15.mlp.down_proj.qweight": "model-00001-of-00002.safetensors",
214
- "model.layers.15.mlp.down_proj.qzeros": "model-00001-of-00002.safetensors",
215
- "model.layers.15.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
216
  "model.layers.15.mlp.gate_up_proj.g_idx": "model-00001-of-00002.safetensors",
217
- "model.layers.15.mlp.gate_up_proj.qweight": "model-00001-of-00002.safetensors",
218
- "model.layers.15.mlp.gate_up_proj.qzeros": "model-00001-of-00002.safetensors",
219
  "model.layers.15.mlp.gate_up_proj.scales": "model-00001-of-00002.safetensors",
220
  "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
221
  "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
@@ -238,34 +238,34 @@
238
  "model.layers.15.self_attn.v_proj.qzeros": "model-00001-of-00002.safetensors",
239
  "model.layers.15.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
240
  "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
241
- "model.layers.16.mlp.down_proj.g_idx": "model-00001-of-00002.safetensors",
242
- "model.layers.16.mlp.down_proj.qweight": "model-00001-of-00002.safetensors",
243
- "model.layers.16.mlp.down_proj.qzeros": "model-00001-of-00002.safetensors",
244
- "model.layers.16.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
245
- "model.layers.16.mlp.gate_up_proj.g_idx": "model-00001-of-00002.safetensors",
246
- "model.layers.16.mlp.gate_up_proj.qweight": "model-00001-of-00002.safetensors",
247
- "model.layers.16.mlp.gate_up_proj.qzeros": "model-00001-of-00002.safetensors",
248
- "model.layers.16.mlp.gate_up_proj.scales": "model-00001-of-00002.safetensors",
249
  "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
250
- "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
251
- "model.layers.16.self_attn.k_proj.g_idx": "model-00001-of-00002.safetensors",
252
- "model.layers.16.self_attn.k_proj.qweight": "model-00001-of-00002.safetensors",
253
- "model.layers.16.self_attn.k_proj.qzeros": "model-00001-of-00002.safetensors",
254
- "model.layers.16.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
255
- "model.layers.16.self_attn.o_proj.g_idx": "model-00001-of-00002.safetensors",
256
- "model.layers.16.self_attn.o_proj.qweight": "model-00001-of-00002.safetensors",
257
- "model.layers.16.self_attn.o_proj.qzeros": "model-00001-of-00002.safetensors",
258
- "model.layers.16.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
259
- "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
260
- "model.layers.16.self_attn.q_proj.g_idx": "model-00001-of-00002.safetensors",
261
- "model.layers.16.self_attn.q_proj.qweight": "model-00001-of-00002.safetensors",
262
- "model.layers.16.self_attn.q_proj.qzeros": "model-00001-of-00002.safetensors",
263
- "model.layers.16.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
264
- "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
265
- "model.layers.16.self_attn.v_proj.g_idx": "model-00001-of-00002.safetensors",
266
- "model.layers.16.self_attn.v_proj.qweight": "model-00001-of-00002.safetensors",
267
- "model.layers.16.self_attn.v_proj.qzeros": "model-00001-of-00002.safetensors",
268
- "model.layers.16.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
269
  "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
270
  "model.layers.17.mlp.down_proj.g_idx": "model-00002-of-00002.safetensors",
271
  "model.layers.17.mlp.down_proj.qweight": "model-00002-of-00002.safetensors",
@@ -285,11 +285,11 @@
285
  "model.layers.17.self_attn.o_proj.qweight": "model-00002-of-00002.safetensors",
286
  "model.layers.17.self_attn.o_proj.qzeros": "model-00002-of-00002.safetensors",
287
  "model.layers.17.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
288
- "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
289
- "model.layers.17.self_attn.q_proj.g_idx": "model-00001-of-00002.safetensors",
290
  "model.layers.17.self_attn.q_proj.qweight": "model-00002-of-00002.safetensors",
291
  "model.layers.17.self_attn.q_proj.qzeros": "model-00002-of-00002.safetensors",
292
- "model.layers.17.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
293
  "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
294
  "model.layers.17.self_attn.v_proj.g_idx": "model-00002-of-00002.safetensors",
295
  "model.layers.17.self_attn.v_proj.qweight": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 7205958476
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00001-of-00002.safetensors",
 
209
  "model.layers.14.self_attn.v_proj.qzeros": "model-00001-of-00002.safetensors",
210
  "model.layers.14.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
211
  "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
212
+ "model.layers.15.mlp.down_proj.g_idx": "model-00002-of-00002.safetensors",
213
+ "model.layers.15.mlp.down_proj.qweight": "model-00002-of-00002.safetensors",
214
+ "model.layers.15.mlp.down_proj.qzeros": "model-00002-of-00002.safetensors",
215
+ "model.layers.15.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
216
  "model.layers.15.mlp.gate_up_proj.g_idx": "model-00001-of-00002.safetensors",
217
+ "model.layers.15.mlp.gate_up_proj.qweight": "model-00002-of-00002.safetensors",
218
+ "model.layers.15.mlp.gate_up_proj.qzeros": "model-00002-of-00002.safetensors",
219
  "model.layers.15.mlp.gate_up_proj.scales": "model-00001-of-00002.safetensors",
220
  "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
221
  "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
 
238
  "model.layers.15.self_attn.v_proj.qzeros": "model-00001-of-00002.safetensors",
239
  "model.layers.15.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
240
  "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
241
+ "model.layers.16.mlp.down_proj.g_idx": "model-00002-of-00002.safetensors",
242
+ "model.layers.16.mlp.down_proj.qweight": "model-00002-of-00002.safetensors",
243
+ "model.layers.16.mlp.down_proj.qzeros": "model-00002-of-00002.safetensors",
244
+ "model.layers.16.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
245
+ "model.layers.16.mlp.gate_up_proj.g_idx": "model-00002-of-00002.safetensors",
246
+ "model.layers.16.mlp.gate_up_proj.qweight": "model-00002-of-00002.safetensors",
247
+ "model.layers.16.mlp.gate_up_proj.qzeros": "model-00002-of-00002.safetensors",
248
+ "model.layers.16.mlp.gate_up_proj.scales": "model-00002-of-00002.safetensors",
249
  "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
250
+ "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
251
+ "model.layers.16.self_attn.k_proj.g_idx": "model-00002-of-00002.safetensors",
252
+ "model.layers.16.self_attn.k_proj.qweight": "model-00002-of-00002.safetensors",
253
+ "model.layers.16.self_attn.k_proj.qzeros": "model-00002-of-00002.safetensors",
254
+ "model.layers.16.self_attn.k_proj.scales": "model-00002-of-00002.safetensors",
255
+ "model.layers.16.self_attn.o_proj.g_idx": "model-00002-of-00002.safetensors",
256
+ "model.layers.16.self_attn.o_proj.qweight": "model-00002-of-00002.safetensors",
257
+ "model.layers.16.self_attn.o_proj.qzeros": "model-00002-of-00002.safetensors",
258
+ "model.layers.16.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
259
+ "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
260
+ "model.layers.16.self_attn.q_proj.g_idx": "model-00002-of-00002.safetensors",
261
+ "model.layers.16.self_attn.q_proj.qweight": "model-00002-of-00002.safetensors",
262
+ "model.layers.16.self_attn.q_proj.qzeros": "model-00002-of-00002.safetensors",
263
+ "model.layers.16.self_attn.q_proj.scales": "model-00002-of-00002.safetensors",
264
+ "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
265
+ "model.layers.16.self_attn.v_proj.g_idx": "model-00002-of-00002.safetensors",
266
+ "model.layers.16.self_attn.v_proj.qweight": "model-00002-of-00002.safetensors",
267
+ "model.layers.16.self_attn.v_proj.qzeros": "model-00002-of-00002.safetensors",
268
+ "model.layers.16.self_attn.v_proj.scales": "model-00002-of-00002.safetensors",
269
  "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
270
  "model.layers.17.mlp.down_proj.g_idx": "model-00002-of-00002.safetensors",
271
  "model.layers.17.mlp.down_proj.qweight": "model-00002-of-00002.safetensors",
 
285
  "model.layers.17.self_attn.o_proj.qweight": "model-00002-of-00002.safetensors",
286
  "model.layers.17.self_attn.o_proj.qzeros": "model-00002-of-00002.safetensors",
287
  "model.layers.17.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
288
+ "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
289
+ "model.layers.17.self_attn.q_proj.g_idx": "model-00002-of-00002.safetensors",
290
  "model.layers.17.self_attn.q_proj.qweight": "model-00002-of-00002.safetensors",
291
  "model.layers.17.self_attn.q_proj.qzeros": "model-00002-of-00002.safetensors",
292
+ "model.layers.17.self_attn.q_proj.scales": "model-00002-of-00002.safetensors",
293
  "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
294
  "model.layers.17.self_attn.v_proj.g_idx": "model-00002-of-00002.safetensors",
295
  "model.layers.17.self_attn.v_proj.qweight": "model-00002-of-00002.safetensors",
quant_log.csv CHANGED
@@ -1,241 +1,241 @@
1
  layer,module,loss,samples,damp,time
2
- 0,self_attn.k_proj,0.0000003966,0.05000,1.438
3
- 0,self_attn.q_proj,0.0000019802,0.05000,1.594
4
- 0,self_attn.v_proj,0.0000000114,0.05000,1.607
5
- 0,self_attn.o_proj,0.0000000000,0.05000,1.134
6
- 0,mlp.gate_up_proj,0.0000061852,0.05000,1.195
7
- 0,mlp.down_proj,0.0000000032,0.05000,4.054
8
- 1,self_attn.k_proj,0.0000019767,0.05000,1.274
9
- 1,self_attn.v_proj,0.0000001176,0.05000,1.285
10
- 1,self_attn.q_proj,0.0000080634,0.05000,1.340
11
- 1,self_attn.o_proj,0.0000000013,0.05000,1.101
12
- 1,mlp.gate_up_proj,0.0000125150,0.05000,1.157
13
- 1,mlp.down_proj,0.0000000047,0.05000,3.877
14
- 2,self_attn.k_proj,0.0000034906,0.05000,1.173
15
- 2,self_attn.v_proj,0.0000002783,0.05000,1.282
16
- 2,self_attn.q_proj,0.0000170658,0.05000,1.294
17
- 2,self_attn.o_proj,0.0000000021,0.05000,1.114
18
- 2,mlp.gate_up_proj,0.0000202625,0.05000,1.142
19
- 2,mlp.down_proj,0.0000000065,0.05000,3.932
20
- 3,self_attn.k_proj,0.0000060004,0.05000,1.205
21
- 3,self_attn.v_proj,0.0000003360,0.05000,1.215
22
- 3,self_attn.q_proj,0.0000296674,0.05000,1.227
23
- 3,self_attn.o_proj,0.0000000026,0.05000,1.052
24
- 3,mlp.gate_up_proj,0.0000340755,0.05000,1.096
25
- 3,mlp.down_proj,0.0000000099,0.05000,4.182
26
- 4,self_attn.k_proj,0.0000037749,0.05000,1.160
27
- 4,self_attn.v_proj,0.0000003488,0.05000,1.216
28
- 4,self_attn.q_proj,0.0000199789,0.05000,1.228
29
- 4,self_attn.o_proj,0.0000000077,0.05000,1.183
30
- 4,mlp.gate_up_proj,0.0000506752,0.05000,1.135
31
- 4,mlp.down_proj,0.0000004453,0.05000,4.171
32
- 5,self_attn.q_proj,0.0000297330,0.05000,1.136
33
- 5,self_attn.v_proj,0.0000006601,0.05000,1.138
34
- 5,self_attn.k_proj,0.0000051065,0.05000,1.196
35
- 5,self_attn.o_proj,0.0000000107,0.05000,1.112
36
- 5,mlp.gate_up_proj,0.0000613599,0.05000,1.132
37
- 5,mlp.down_proj,0.0000000300,0.05000,3.957
38
- 6,self_attn.v_proj,0.0000008097,0.05000,1.264
39
- 6,self_attn.q_proj,0.0000462938,0.05000,1.274
40
- 6,self_attn.k_proj,0.0000072560,0.05000,1.278
41
- 6,self_attn.o_proj,0.0000000130,0.05000,1.185
42
- 6,mlp.gate_up_proj,0.0000733845,0.05000,1.165
43
- 6,mlp.down_proj,0.0000000425,0.05000,4.057
44
- 7,self_attn.q_proj,0.0000374955,0.05000,1.290
45
- 7,self_attn.v_proj,0.0000011266,0.05000,1.346
46
- 7,self_attn.k_proj,0.0000050541,0.05000,1.373
47
- 7,self_attn.o_proj,0.0000000191,0.05000,1.118
48
- 7,mlp.gate_up_proj,0.0000839490,0.05000,1.217
49
- 7,mlp.down_proj,0.0000000710,0.05000,3.884
50
- 8,self_attn.k_proj,0.0000065933,0.05000,1.153
51
- 8,self_attn.q_proj,0.0000392850,0.05000,1.233
52
- 8,self_attn.v_proj,0.0000009417,0.05000,1.272
53
- 8,self_attn.o_proj,0.0000000265,0.05000,1.043
54
- 8,mlp.gate_up_proj,0.0000902727,0.05000,1.170
55
- 8,mlp.down_proj,0.0000000827,0.05000,3.972
56
- 9,self_attn.q_proj,0.0000515417,0.05000,1.276
57
- 9,self_attn.v_proj,0.0000014904,0.05000,1.281
58
- 9,self_attn.k_proj,0.0000065620,0.05000,1.292
59
- 9,self_attn.o_proj,0.0000000438,0.05000,1.081
60
- 9,mlp.gate_up_proj,0.0001111705,0.05000,1.176
61
- 9,mlp.down_proj,0.0000000930,0.05000,3.890
62
- 10,self_attn.q_proj,0.0000452197,0.05000,1.213
63
- 10,self_attn.v_proj,0.0000016617,0.05000,1.214
64
- 10,self_attn.k_proj,0.0000061107,0.05000,1.265
65
- 10,self_attn.o_proj,0.0000000349,0.05000,1.028
66
- 10,mlp.gate_up_proj,0.0001140903,0.05000,1.132
67
- 10,mlp.down_proj,0.0000000927,0.05000,3.873
68
- 11,self_attn.q_proj,0.0000451923,0.05000,1.160
69
- 11,self_attn.v_proj,0.0000018009,0.05000,1.171
70
- 11,self_attn.k_proj,0.0000063196,0.05000,1.178
71
- 11,self_attn.o_proj,0.0000000383,0.05000,1.018
72
- 11,mlp.gate_up_proj,0.0001226848,0.05000,1.123
73
- 11,mlp.down_proj,0.0000000988,0.05000,3.885
74
- 12,self_attn.k_proj,0.0000087359,0.05000,1.131
75
- 12,self_attn.v_proj,0.0000014319,0.05000,1.171
76
- 12,self_attn.q_proj,0.0000527561,0.05000,1.221
77
- 12,self_attn.o_proj,0.0000000496,0.05000,1.079
78
- 12,mlp.gate_up_proj,0.0001314741,0.05000,1.134
79
- 12,mlp.down_proj,0.0000001164,0.05000,4.036
80
- 13,self_attn.k_proj,0.0000070793,0.05000,1.070
81
- 13,self_attn.q_proj,0.0000542174,0.05000,1.129
82
- 13,self_attn.v_proj,0.0000025064,0.05000,1.166
83
- 13,self_attn.o_proj,0.0000000436,0.05000,1.105
84
- 13,mlp.gate_up_proj,0.0001388454,0.05000,1.159
85
- 13,mlp.down_proj,0.0000001379,0.05000,3.938
86
- 14,self_attn.k_proj,0.0000079858,0.05000,1.189
87
- 14,self_attn.v_proj,0.0000017932,0.05000,1.216
88
- 14,self_attn.q_proj,0.0000557084,0.05000,1.332
89
- 14,self_attn.o_proj,0.0000000553,0.05000,1.028
90
- 14,mlp.gate_up_proj,0.0001397226,0.05000,1.144
91
- 14,mlp.down_proj,0.0000001645,0.05000,3.879
92
- 15,self_attn.k_proj,0.0000091874,0.05000,1.207
93
- 15,self_attn.q_proj,0.0000600741,0.05000,1.262
94
- 15,self_attn.v_proj,0.0000022552,0.05000,1.273
95
- 15,self_attn.o_proj,0.0000000711,0.05000,1.028
96
- 15,mlp.gate_up_proj,0.0001469981,0.05000,1.183
97
- 15,mlp.down_proj,0.0000002013,0.05000,3.935
98
- 16,self_attn.k_proj,0.0000087135,0.05000,1.173
99
- 16,self_attn.v_proj,0.0000030199,0.05000,1.207
100
- 16,self_attn.q_proj,0.0000658588,0.05000,1.295
101
- 16,self_attn.o_proj,0.0000000821,0.05000,1.064
102
- 16,mlp.gate_up_proj,0.0001525509,0.05000,1.108
103
- 16,mlp.down_proj,0.0000002533,0.05000,3.940
104
- 17,self_attn.k_proj,0.0000071499,0.05000,1.108
105
- 17,self_attn.v_proj,0.0000027810,0.05000,1.159
106
- 17,self_attn.q_proj,0.0000557307,0.05000,1.164
107
- 17,self_attn.o_proj,0.0000000981,0.05000,1.075
108
- 17,mlp.gate_up_proj,0.0001636252,0.05000,1.144
109
- 17,mlp.down_proj,0.0000003070,0.05000,3.912
110
- 18,self_attn.k_proj,0.0000060159,0.05000,1.092
111
- 18,self_attn.v_proj,0.0000045565,0.05000,1.095
112
- 18,self_attn.q_proj,0.0001490090,0.05000,1.116
113
- 18,self_attn.o_proj,0.0000001456,0.05000,1.028
114
- 18,mlp.gate_up_proj,0.0001617964,0.05000,1.183
115
- 18,mlp.down_proj,0.0000003610,0.05000,4.033
116
- 19,self_attn.v_proj,0.0000042745,0.05000,1.119
117
- 19,self_attn.k_proj,0.0000091773,0.05000,1.128
118
- 19,self_attn.q_proj,0.0000698117,0.05000,1.133
119
- 19,self_attn.o_proj,0.0000001922,0.05000,1.051
120
- 19,mlp.gate_up_proj,0.0001840929,0.05000,1.114
121
- 19,mlp.down_proj,0.0000004435,0.05000,3.909
122
- 20,self_attn.k_proj,0.0000073916,0.05000,1.198
123
- 20,self_attn.q_proj,0.0000671984,0.05000,1.198
124
- 20,self_attn.v_proj,0.0000052818,0.05000,1.209
125
- 20,self_attn.o_proj,0.0000001802,0.05000,1.016
126
- 20,mlp.gate_up_proj,0.0002143455,0.05000,1.086
127
- 20,mlp.down_proj,0.0000006132,0.05000,4.101
128
- 21,self_attn.k_proj,0.0000076497,0.05000,1.101
129
- 21,self_attn.v_proj,0.0000061887,0.05000,1.183
130
- 21,self_attn.q_proj,0.0000699514,0.05000,1.195
131
- 21,self_attn.o_proj,0.0000002686,0.05000,1.090
132
- 21,mlp.gate_up_proj,0.0002570642,0.05000,1.122
133
- 21,mlp.down_proj,0.0000009058,0.05000,3.896
134
- 22,self_attn.v_proj,0.0000077014,0.05000,1.110
135
- 22,self_attn.q_proj,0.0001222432,0.05000,1.135
136
- 22,self_attn.k_proj,0.0000074267,0.05000,1.178
137
- 22,self_attn.o_proj,0.0000003587,0.05000,1.009
138
- 22,mlp.gate_up_proj,0.0002880060,0.05000,1.162
139
- 22,mlp.down_proj,0.0000013639,0.05000,3.980
140
- 23,self_attn.v_proj,0.0000117320,0.05000,1.145
141
- 23,self_attn.k_proj,0.0000079260,0.05000,1.178
142
- 23,self_attn.q_proj,0.0000876769,0.05000,1.222
143
- 23,self_attn.o_proj,0.0000005188,0.05000,1.157
144
- 23,mlp.gate_up_proj,0.0003447045,0.05000,1.187
145
- 23,mlp.down_proj,0.0000017473,0.05000,4.074
146
- 24,self_attn.q_proj,0.0001026236,0.05000,1.199
147
- 24,self_attn.v_proj,0.0000193858,0.05000,1.221
148
- 24,self_attn.k_proj,0.0000087059,0.05000,1.228
149
- 24,self_attn.o_proj,0.0000004895,0.05000,1.108
150
- 24,mlp.gate_up_proj,0.0004117376,0.05000,1.135
151
- 24,mlp.down_proj,0.0000025886,0.05000,3.998
152
- 25,self_attn.k_proj,0.0000089159,0.05000,1.112
153
- 25,self_attn.q_proj,0.0000940188,0.05000,1.131
154
- 25,self_attn.v_proj,0.0000155966,0.05000,1.188
155
- 25,self_attn.o_proj,0.0000004938,0.05000,1.048
156
- 25,mlp.gate_up_proj,0.0004781412,0.05000,1.095
157
- 25,mlp.down_proj,0.0000038761,0.05000,3.927
158
- 26,self_attn.k_proj,0.0000087348,0.05000,1.139
159
- 26,self_attn.v_proj,0.0000259328,0.05000,1.175
160
- 26,self_attn.q_proj,0.0001108469,0.05000,1.246
161
- 26,self_attn.o_proj,0.0000007847,0.05000,1.055
162
- 26,mlp.gate_up_proj,0.0005478908,0.05000,1.073
163
- 26,mlp.down_proj,0.0000044186,0.05000,3.839
164
- 27,self_attn.k_proj,0.0000072219,0.05000,1.099
165
- 27,self_attn.v_proj,0.0000226286,0.05000,1.135
166
- 27,self_attn.q_proj,0.0001000139,0.05000,1.166
167
- 27,self_attn.o_proj,0.0000008341,0.05000,1.163
168
- 27,mlp.gate_up_proj,0.0005620720,0.05000,1.135
169
- 27,mlp.down_proj,0.0000048297,0.05000,3.856
170
- 28,self_attn.v_proj,0.0000370639,0.05000,1.184
171
- 28,self_attn.k_proj,0.0000085196,0.05000,1.186
172
- 28,self_attn.q_proj,0.0001152836,0.05000,1.225
173
- 28,self_attn.o_proj,0.0000009589,0.05000,1.083
174
- 28,mlp.gate_up_proj,0.0006251217,0.05000,1.190
175
- 28,mlp.down_proj,0.0000057185,0.05000,3.868
176
- 29,self_attn.q_proj,0.0001401858,0.05000,1.209
177
- 29,self_attn.v_proj,0.0000561455,0.05000,1.240
178
- 29,self_attn.k_proj,0.0000101474,0.05000,1.244
179
- 29,self_attn.o_proj,0.0000017324,0.05000,1.182
180
- 29,mlp.gate_up_proj,0.0007084473,0.05000,1.111
181
- 29,mlp.down_proj,0.0000069938,0.05000,3.892
182
- 30,self_attn.v_proj,0.0000279494,0.05000,1.254
183
- 30,self_attn.q_proj,0.0001222076,0.05000,1.275
184
- 30,self_attn.k_proj,0.0000098990,0.05000,1.279
185
- 30,self_attn.o_proj,0.0000007452,0.05000,1.017
186
- 30,mlp.gate_up_proj,0.0007461067,0.05000,1.090
187
- 30,mlp.down_proj,0.0000082241,0.05000,4.023
188
- 31,self_attn.q_proj,0.0001208646,0.05000,1.169
189
- 31,self_attn.k_proj,0.0000094290,0.05000,1.184
190
- 31,self_attn.v_proj,0.0000466435,0.05000,1.197
191
- 31,self_attn.o_proj,0.0000009983,0.05000,1.059
192
- 31,mlp.gate_up_proj,0.0007930596,0.05000,1.164
193
- 31,mlp.down_proj,0.0000089640,0.05000,3.871
194
- 32,self_attn.k_proj,0.0000088719,0.05000,1.089
195
- 32,self_attn.v_proj,0.0000963343,0.05000,1.207
196
- 32,self_attn.q_proj,0.0001329254,0.05000,1.207
197
- 32,self_attn.o_proj,0.0000019815,0.05000,1.023
198
- 32,mlp.gate_up_proj,0.0008661926,0.05000,1.140
199
- 32,mlp.down_proj,0.0000095839,0.05000,3.852
200
- 33,self_attn.v_proj,0.0001405953,0.05000,1.212
201
- 33,self_attn.k_proj,0.0000092522,0.05000,1.225
202
- 33,self_attn.q_proj,0.0001401690,0.05000,1.239
203
- 33,self_attn.o_proj,0.0000019085,0.05000,1.089
204
- 33,mlp.gate_up_proj,0.0009390484,0.05000,1.116
205
- 33,mlp.down_proj,0.0000111966,0.05000,3.913
206
- 34,self_attn.k_proj,0.0000103305,0.05000,1.114
207
- 34,self_attn.q_proj,0.0001466204,0.05000,1.159
208
- 34,self_attn.v_proj,0.0001133597,0.05000,1.185
209
- 34,self_attn.o_proj,0.0000014576,0.05000,1.126
210
- 34,mlp.gate_up_proj,0.0010087134,0.05000,1.172
211
- 34,mlp.down_proj,0.0000149076,0.05000,3.995
212
- 35,self_attn.k_proj,0.0000087772,0.05000,1.113
213
- 35,self_attn.q_proj,0.0001403766,0.05000,1.152
214
- 35,self_attn.v_proj,0.0001295132,0.05000,1.156
215
- 35,self_attn.o_proj,0.0000030535,0.05000,1.042
216
- 35,mlp.gate_up_proj,0.0011452081,0.05000,1.240
217
- 35,mlp.down_proj,0.0000185383,0.05000,4.090
218
- 36,self_attn.q_proj,0.0001428431,0.05000,1.180
219
- 36,self_attn.k_proj,0.0000099150,0.05000,1.196
220
- 36,self_attn.v_proj,0.0000602161,0.05000,1.287
221
- 36,self_attn.o_proj,0.0000034438,0.05000,1.050
222
- 36,mlp.gate_up_proj,0.0011308273,0.05000,1.124
223
- 36,mlp.down_proj,0.0000222443,0.05000,3.974
224
- 37,self_attn.k_proj,0.0000076602,0.05000,1.213
225
- 37,self_attn.q_proj,0.0001303909,0.05000,1.220
226
- 37,self_attn.v_proj,0.0000581156,0.05000,1.222
227
- 37,self_attn.o_proj,0.0000040153,0.05000,1.034
228
- 37,mlp.gate_up_proj,0.0011603485,0.05000,1.187
229
- 37,mlp.down_proj,0.0000326401,0.05000,3.968
230
- 38,self_attn.v_proj,0.0000411816,0.05000,1.114
231
- 38,self_attn.q_proj,0.0000913141,0.05000,1.177
232
- 38,self_attn.k_proj,0.0000054355,0.05000,1.203
233
- 38,self_attn.o_proj,0.0000019167,0.05000,1.043
234
- 38,mlp.gate_up_proj,0.0019318110,0.05000,1.182
235
- 38,mlp.down_proj,0.0981100216,0.05000,3.931
236
- 39,self_attn.q_proj,0.0001141409,0.05000,1.157
237
- 39,self_attn.v_proj,0.0000450612,0.05000,1.173
238
- 39,self_attn.k_proj,0.0000136546,0.05000,1.179
239
- 39,self_attn.o_proj,0.0000032148,0.05000,1.127
240
- 39,mlp.gate_up_proj,0.0033458435,0.05000,1.130
241
- 39,mlp.down_proj,0.0001901832,0.05000,3.925
 
1
  layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.0000002922,0.05000,1.374
3
+ 0,self_attn.v_proj,0.0000000078,0.05000,1.877
4
+ 0,self_attn.q_proj,0.0000013902,0.05000,1.894
5
+ 0,self_attn.o_proj,0.0000000000,0.05000,1.079
6
+ 0,mlp.gate_up_proj,0.0000039082,0.05000,1.145
7
+ 0,mlp.down_proj,0.0000000022,0.05000,3.957
8
+ 1,self_attn.k_proj,0.0000012696,0.05000,1.122
9
+ 1,self_attn.q_proj,0.0000052420,0.05000,1.642
10
+ 1,self_attn.v_proj,0.0000000755,0.05000,1.651
11
+ 1,self_attn.o_proj,0.0000000010,0.05000,1.078
12
+ 1,mlp.gate_up_proj,0.0000090307,0.05000,1.138
13
+ 1,mlp.down_proj,0.0000000033,0.05000,3.974
14
+ 2,self_attn.k_proj,0.0000022448,0.05000,1.120
15
+ 2,self_attn.v_proj,0.0000001895,0.05000,1.844
16
+ 2,self_attn.q_proj,0.0000115248,0.05000,1.867
17
+ 2,self_attn.o_proj,0.0000000015,0.05000,1.097
18
+ 2,mlp.gate_up_proj,0.0000145250,0.05000,1.131
19
+ 2,mlp.down_proj,0.0000000044,0.05000,4.032
20
+ 3,self_attn.k_proj,0.0000041788,0.05000,1.181
21
+ 3,self_attn.v_proj,0.0000002238,0.05000,1.954
22
+ 3,self_attn.q_proj,0.0000207401,0.05000,2.006
23
+ 3,self_attn.o_proj,0.0000000020,0.05000,1.164
24
+ 3,mlp.gate_up_proj,0.0000242088,0.05000,1.087
25
+ 3,mlp.down_proj,0.0000000069,0.05000,4.102
26
+ 4,self_attn.k_proj,0.0000025550,0.05000,1.137
27
+ 4,self_attn.v_proj,0.0000002392,0.05000,1.907
28
+ 4,self_attn.q_proj,0.0000136254,0.05000,1.937
29
+ 4,self_attn.o_proj,0.0000000054,0.05000,1.106
30
+ 4,mlp.gate_up_proj,0.0000356180,0.05000,1.139
31
+ 4,mlp.down_proj,0.0000003168,0.05000,3.943
32
+ 5,self_attn.k_proj,0.0000033486,0.05000,1.221
33
+ 5,self_attn.q_proj,0.0000195772,0.05000,1.896
34
+ 5,self_attn.v_proj,0.0000004364,0.05000,1.944
35
+ 5,self_attn.o_proj,0.0000000074,0.05000,1.070
36
+ 5,mlp.gate_up_proj,0.0000432452,0.05000,1.177
37
+ 5,mlp.down_proj,0.0000000208,0.05000,3.918
38
+ 6,self_attn.k_proj,0.0000047738,0.05000,1.113
39
+ 6,self_attn.v_proj,0.0000005422,0.05000,1.863
40
+ 6,self_attn.q_proj,0.0000307641,0.05000,1.887
41
+ 6,self_attn.o_proj,0.0000000094,0.05000,1.128
42
+ 6,mlp.gate_up_proj,0.0000519832,0.05000,1.106
43
+ 6,mlp.down_proj,0.0000000294,0.05000,4.009
44
+ 7,self_attn.k_proj,0.0000035275,0.05000,1.169
45
+ 7,self_attn.q_proj,0.0000262622,0.05000,1.715
46
+ 7,self_attn.v_proj,0.0000007889,0.05000,1.724
47
+ 7,self_attn.o_proj,0.0000000140,0.05000,1.027
48
+ 7,mlp.gate_up_proj,0.0000592456,0.05000,1.096
49
+ 7,mlp.down_proj,0.0000000486,0.05000,3.976
50
+ 8,self_attn.k_proj,0.0000042726,0.05000,1.165
51
+ 8,self_attn.v_proj,0.0000006522,0.05000,1.730
52
+ 8,self_attn.q_proj,0.0000268881,0.05000,1.749
53
+ 8,self_attn.o_proj,0.0000000190,0.05000,1.026
54
+ 8,mlp.gate_up_proj,0.0000640788,0.05000,1.062
55
+ 8,mlp.down_proj,0.0000000562,0.05000,3.830
56
+ 9,self_attn.k_proj,0.0000044805,0.05000,1.092
57
+ 9,self_attn.v_proj,0.0000010438,0.05000,1.710
58
+ 9,self_attn.q_proj,0.0000359093,0.05000,1.731
59
+ 9,self_attn.o_proj,0.0000000310,0.05000,1.021
60
+ 9,mlp.gate_up_proj,0.0000793702,0.05000,1.124
61
+ 9,mlp.down_proj,0.0000000633,0.05000,3.819
62
+ 10,self_attn.k_proj,0.0000042289,0.05000,1.117
63
+ 10,self_attn.v_proj,0.0000011532,0.05000,2.009
64
+ 10,self_attn.q_proj,0.0000316913,0.05000,2.014
65
+ 10,self_attn.o_proj,0.0000000250,0.05000,1.039
66
+ 10,mlp.gate_up_proj,0.0000812482,0.05000,1.073
67
+ 10,mlp.down_proj,0.0000000638,0.05000,3.959
68
+ 11,self_attn.k_proj,0.0000043707,0.05000,1.156
69
+ 11,self_attn.v_proj,0.0000012567,0.05000,1.849
70
+ 11,self_attn.q_proj,0.0000318168,0.05000,1.857
71
+ 11,self_attn.o_proj,0.0000000271,0.05000,1.051
72
+ 11,mlp.gate_up_proj,0.0000871886,0.05000,1.064
73
+ 11,mlp.down_proj,0.0000000682,0.05000,3.973
74
+ 12,self_attn.k_proj,0.0000058181,0.05000,1.157
75
+ 12,self_attn.q_proj,0.0000368757,0.05000,1.752
76
+ 12,self_attn.v_proj,0.0000010061,0.05000,1.752
77
+ 12,self_attn.o_proj,0.0000000340,0.05000,1.064
78
+ 12,mlp.gate_up_proj,0.0000935123,0.05000,1.089
79
+ 12,mlp.down_proj,0.0000000808,0.05000,3.845
80
+ 13,self_attn.k_proj,0.0000048201,0.05000,1.215
81
+ 13,self_attn.v_proj,0.0000017309,0.05000,1.838
82
+ 13,self_attn.q_proj,0.0000376639,0.05000,1.851
83
+ 13,self_attn.o_proj,0.0000000299,0.05000,1.044
84
+ 13,mlp.gate_up_proj,0.0000980107,0.05000,1.168
85
+ 13,mlp.down_proj,0.0000000953,0.05000,3.863
86
+ 14,self_attn.k_proj,0.0000055619,0.05000,1.140
87
+ 14,self_attn.v_proj,0.0000012475,0.05000,1.757
88
+ 14,self_attn.q_proj,0.0000390916,0.05000,1.772
89
+ 14,self_attn.o_proj,0.0000000384,0.05000,1.100
90
+ 14,mlp.gate_up_proj,0.0000979989,0.05000,1.138
91
+ 14,mlp.down_proj,0.0000001124,0.05000,3.875
92
+ 15,self_attn.k_proj,0.0000062197,0.05000,1.168
93
+ 15,self_attn.v_proj,0.0000015974,0.05000,1.752
94
+ 15,self_attn.q_proj,0.0000422419,0.05000,1.770
95
+ 15,self_attn.o_proj,0.0000000473,0.05000,1.035
96
+ 15,mlp.gate_up_proj,0.0001028249,0.05000,1.133
97
+ 15,mlp.down_proj,0.0000001380,0.05000,3.842
98
+ 16,self_attn.k_proj,0.0000059852,0.05000,1.133
99
+ 16,self_attn.q_proj,0.0000459906,0.05000,1.873
100
+ 16,self_attn.v_proj,0.0000020952,0.05000,1.879
101
+ 16,self_attn.o_proj,0.0000000582,0.05000,1.079
102
+ 16,mlp.gate_up_proj,0.0001066342,0.05000,1.075
103
+ 16,mlp.down_proj,0.0000001729,0.05000,3.851
104
+ 17,self_attn.k_proj,0.0000048882,0.05000,1.104
105
+ 17,self_attn.v_proj,0.0000019357,0.05000,1.857
106
+ 17,self_attn.q_proj,0.0000389605,0.05000,1.901
107
+ 17,self_attn.o_proj,0.0000000714,0.05000,1.078
108
+ 17,mlp.gate_up_proj,0.0001144119,0.05000,1.132
109
+ 17,mlp.down_proj,0.0000002098,0.05000,3.790
110
+ 18,self_attn.k_proj,0.0000041478,0.05000,1.127
111
+ 18,self_attn.v_proj,0.0000031805,0.05000,1.769
112
+ 18,self_attn.q_proj,0.0000957768,0.05000,1.781
113
+ 18,self_attn.o_proj,0.0000001034,0.05000,1.075
114
+ 18,mlp.gate_up_proj,0.0001127454,0.05000,1.076
115
+ 18,mlp.down_proj,0.0000002447,0.05000,3.794
116
+ 19,self_attn.k_proj,0.0000061308,0.05000,1.154
117
+ 19,self_attn.q_proj,0.0000491331,0.05000,2.046
118
+ 19,self_attn.v_proj,0.0000030159,0.05000,2.120
119
+ 19,self_attn.o_proj,0.0000001317,0.05000,1.042
120
+ 19,mlp.gate_up_proj,0.0001274382,0.05000,1.156
121
+ 19,mlp.down_proj,0.0000003014,0.05000,3.921
122
+ 20,self_attn.k_proj,0.0000051466,0.05000,1.144
123
+ 20,self_attn.v_proj,0.0000037440,0.05000,2.055
124
+ 20,self_attn.q_proj,0.0000471429,0.05000,2.091
125
+ 20,self_attn.o_proj,0.0000001258,0.05000,1.140
126
+ 20,mlp.gate_up_proj,0.0001480629,0.05000,1.182
127
+ 20,mlp.down_proj,0.0000004168,0.05000,3.869
128
+ 21,self_attn.k_proj,0.0000051177,0.05000,1.334
129
+ 21,self_attn.v_proj,0.0000043570,0.05000,2.250
130
+ 21,self_attn.q_proj,0.0000487124,0.05000,2.343
131
+ 21,self_attn.o_proj,0.0000001870,0.05000,1.147
132
+ 21,mlp.gate_up_proj,0.0001766828,0.05000,1.209
133
+ 21,mlp.down_proj,0.0000006147,0.05000,4.178
134
+ 22,self_attn.k_proj,0.0000051113,0.05000,1.275
135
+ 22,self_attn.q_proj,0.0000819439,0.05000,2.239
136
+ 22,self_attn.v_proj,0.0000054272,0.05000,2.240
137
+ 22,self_attn.o_proj,0.0000002462,0.05000,1.177
138
+ 22,mlp.gate_up_proj,0.0001973738,0.05000,1.153
139
+ 22,mlp.down_proj,0.0000009183,0.05000,4.256
140
+ 23,self_attn.k_proj,0.0000053470,0.05000,1.189
141
+ 23,self_attn.q_proj,0.0000609132,0.05000,2.324
142
+ 23,self_attn.v_proj,0.0000080116,0.05000,2.430
143
+ 23,self_attn.o_proj,0.0000003616,0.05000,1.197
144
+ 23,mlp.gate_up_proj,0.0002378243,0.05000,1.247
145
+ 23,mlp.down_proj,0.0000011878,0.05000,4.577
146
+ 24,self_attn.k_proj,0.0000059234,0.05000,1.537
147
+ 24,self_attn.v_proj,0.0000133390,0.05000,2.142
148
+ 24,self_attn.q_proj,0.0000714417,0.05000,2.189
149
+ 24,self_attn.o_proj,0.0000003454,0.05000,1.091
150
+ 24,mlp.gate_up_proj,0.0002860948,0.05000,1.226
151
+ 24,mlp.down_proj,0.0000017579,0.05000,4.429
152
+ 25,self_attn.k_proj,0.0000059778,0.05000,1.289
153
+ 25,self_attn.v_proj,0.0000109253,0.05000,2.258
154
+ 25,self_attn.q_proj,0.0000656750,0.05000,2.305
155
+ 25,self_attn.o_proj,0.0000003434,0.05000,1.055
156
+ 25,mlp.gate_up_proj,0.0003338020,0.05000,1.211
157
+ 25,mlp.down_proj,0.0000026202,0.05000,4.044
158
+ 26,self_attn.k_proj,0.0000059038,0.05000,1.359
159
+ 26,self_attn.q_proj,0.0000755639,0.05000,2.183
160
+ 26,self_attn.v_proj,0.0000171953,0.05000,2.198
161
+ 26,self_attn.o_proj,0.0000005517,0.05000,1.207
162
+ 26,mlp.gate_up_proj,0.0003860486,0.05000,1.339
163
+ 26,mlp.down_proj,0.0000030165,0.05000,3.993
164
+ 27,self_attn.k_proj,0.0000050343,0.05000,1.366
165
+ 27,self_attn.v_proj,0.0000149522,0.05000,1.941
166
+ 27,self_attn.q_proj,0.0000688656,0.05000,1.996
167
+ 27,self_attn.o_proj,0.0000006007,0.05000,1.172
168
+ 27,mlp.gate_up_proj,0.0003946776,0.05000,1.228
169
+ 27,mlp.down_proj,0.0000032734,0.05000,4.176
170
+ 28,self_attn.k_proj,0.0000058891,0.05000,1.336
171
+ 28,self_attn.v_proj,0.0000237423,0.05000,2.540
172
+ 28,self_attn.q_proj,0.0000784953,0.05000,2.574
173
+ 28,self_attn.o_proj,0.0000006531,0.05000,1.148
174
+ 28,mlp.gate_up_proj,0.0004399898,0.05000,1.271
175
+ 28,mlp.down_proj,0.0000038930,0.05000,4.459
176
+ 29,self_attn.k_proj,0.0000068672,0.05000,1.296
177
+ 29,self_attn.q_proj,0.0000941400,0.05000,1.899
178
+ 29,self_attn.v_proj,0.0000369100,0.05000,1.931
179
+ 29,self_attn.o_proj,0.0000012668,0.05000,1.235
180
+ 29,mlp.gate_up_proj,0.0005033797,0.05000,1.269
181
+ 29,mlp.down_proj,0.0000048307,0.05000,4.170
182
+ 30,self_attn.k_proj,0.0000067856,0.05000,1.263
183
+ 30,self_attn.v_proj,0.0000197323,0.05000,2.020
184
+ 30,self_attn.q_proj,0.0000859262,0.05000,2.033
185
+ 30,self_attn.o_proj,0.0000005434,0.05000,1.145
186
+ 30,mlp.gate_up_proj,0.0005280900,0.05000,1.138
187
+ 30,mlp.down_proj,0.0000056601,0.05000,4.186
188
+ 31,self_attn.k_proj,0.0000063339,0.05000,1.287
189
+ 31,self_attn.v_proj,0.0000298222,0.05000,2.549
190
+ 31,self_attn.q_proj,0.0000827375,0.05000,2.575
191
+ 31,self_attn.o_proj,0.0000007528,0.05000,1.104
192
+ 31,mlp.gate_up_proj,0.0005607144,0.05000,1.332
193
+ 31,mlp.down_proj,0.0000061913,0.05000,4.299
194
+ 32,self_attn.k_proj,0.0000059193,0.05000,1.232
195
+ 32,self_attn.q_proj,0.0000891511,0.05000,2.036
196
+ 32,self_attn.v_proj,0.0000645760,0.05000,2.066
197
+ 32,self_attn.o_proj,0.0000014203,0.05000,1.163
198
+ 32,mlp.gate_up_proj,0.0006130429,0.05000,1.157
199
+ 32,mlp.down_proj,0.0000066158,0.05000,4.178
200
+ 33,self_attn.k_proj,0.0000061163,0.05000,1.194
201
+ 33,self_attn.v_proj,0.0000938030,0.05000,2.129
202
+ 33,self_attn.q_proj,0.0000936246,0.05000,2.147
203
+ 33,self_attn.o_proj,0.0000013597,0.05000,1.127
204
+ 33,mlp.gate_up_proj,0.0006655953,0.05000,1.274
205
+ 33,mlp.down_proj,0.0000077043,0.05000,4.179
206
+ 34,self_attn.k_proj,0.0000069299,0.05000,1.297
207
+ 34,self_attn.v_proj,0.0000751168,0.05000,2.122
208
+ 34,self_attn.q_proj,0.0000990393,0.05000,2.190
209
+ 34,self_attn.o_proj,0.0000010204,0.05000,1.204
210
+ 34,mlp.gate_up_proj,0.0007160597,0.05000,1.200
211
+ 34,mlp.down_proj,0.0000102605,0.05000,4.197
212
+ 35,self_attn.k_proj,0.0000056465,0.05000,1.326
213
+ 35,self_attn.v_proj,0.0000821747,0.05000,2.620
214
+ 35,self_attn.q_proj,0.0000964586,0.05000,2.622
215
+ 35,self_attn.o_proj,0.0000022052,0.05000,1.134
216
+ 35,mlp.gate_up_proj,0.0008116817,0.05000,1.192
217
+ 35,mlp.down_proj,0.0000126651,0.05000,4.387
218
+ 36,self_attn.k_proj,0.0000062332,0.05000,1.266
219
+ 36,self_attn.v_proj,0.0000427911,0.05000,2.210
220
+ 36,self_attn.q_proj,0.0000972865,0.05000,2.224
221
+ 36,self_attn.o_proj,0.0000023794,0.05000,1.163
222
+ 36,mlp.gate_up_proj,0.0008006807,0.05000,1.168
223
+ 36,mlp.down_proj,0.0000151278,0.05000,4.207
224
+ 37,self_attn.k_proj,0.0000052548,0.05000,1.194
225
+ 37,self_attn.v_proj,0.0000413565,0.05000,2.030
226
+ 37,self_attn.q_proj,0.0000888177,0.05000,2.048
227
+ 37,self_attn.o_proj,0.0000029221,0.05000,1.173
228
+ 37,mlp.gate_up_proj,0.0008157736,0.05000,1.281
229
+ 37,mlp.down_proj,0.0000219872,0.05000,4.228
230
+ 38,self_attn.k_proj,0.0000036995,0.05000,1.200
231
+ 38,self_attn.v_proj,0.0000287794,0.05000,2.309
232
+ 38,self_attn.q_proj,0.0000631295,0.05000,2.314
233
+ 38,self_attn.o_proj,0.0000013509,0.05000,1.189
234
+ 38,mlp.gate_up_proj,0.0013094005,0.05000,1.219
235
+ 38,mlp.down_proj,0.1816714025,0.05000,4.069
236
+ 39,self_attn.k_proj,0.0000088539,0.05000,1.275
237
+ 39,self_attn.v_proj,0.0000303753,0.05000,2.114
238
+ 39,self_attn.q_proj,0.0000798257,0.05000,2.126
239
+ 39,self_attn.o_proj,0.0000023493,0.05000,1.164
240
+ 39,mlp.gate_up_proj,0.0023529960,0.05000,1.291
241
+ 39,mlp.down_proj,0.0001214337,0.05000,4.208
quantize_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bits": 4,
3
- "group_size": 128,
4
  "desc_act": false,
5
  "sym": true,
6
  "lm_head": false,
 
1
  {
2
  "bits": 4,
3
+ "group_size": 32,
4
  "desc_act": false,
5
  "sym": true,
6
  "lm_head": false,