Update config.json
Browse files- config.json +336 -336
config.json
CHANGED
@@ -31,7 +31,7 @@
|
|
31 |
"model.layers.0.mlp.down_proj": {
|
32 |
"bias": null,
|
33 |
"enable_norm": true,
|
34 |
-
"enable_perm":
|
35 |
"group_num": 1,
|
36 |
"group_size": 13824,
|
37 |
"in_features": 13824,
|
@@ -55,7 +55,7 @@
|
|
55 |
"model.layers.0.mlp.gate_proj": {
|
56 |
"bias": null,
|
57 |
"enable_norm": true,
|
58 |
-
"enable_perm":
|
59 |
"group_num": 1,
|
60 |
"group_size": 5120,
|
61 |
"in_features": 5120,
|
@@ -79,7 +79,7 @@
|
|
79 |
"model.layers.0.mlp.up_proj": {
|
80 |
"bias": null,
|
81 |
"enable_norm": true,
|
82 |
-
"enable_perm":
|
83 |
"group_num": 1,
|
84 |
"group_size": 5120,
|
85 |
"in_features": 5120,
|
@@ -103,7 +103,7 @@
|
|
103 |
"model.layers.0.self_attn.k_proj": {
|
104 |
"bias": true,
|
105 |
"enable_norm": true,
|
106 |
-
"enable_perm":
|
107 |
"group_num": 1,
|
108 |
"group_size": 5120,
|
109 |
"in_features": 5120,
|
@@ -127,7 +127,7 @@
|
|
127 |
"model.layers.0.self_attn.o_proj": {
|
128 |
"bias": null,
|
129 |
"enable_norm": true,
|
130 |
-
"enable_perm":
|
131 |
"group_num": 1,
|
132 |
"group_size": 5120,
|
133 |
"in_features": 5120,
|
@@ -151,7 +151,7 @@
|
|
151 |
"model.layers.0.self_attn.q_proj": {
|
152 |
"bias": true,
|
153 |
"enable_norm": true,
|
154 |
-
"enable_perm":
|
155 |
"group_num": 1,
|
156 |
"group_size": 5120,
|
157 |
"in_features": 5120,
|
@@ -175,7 +175,7 @@
|
|
175 |
"model.layers.0.self_attn.v_proj": {
|
176 |
"bias": true,
|
177 |
"enable_norm": true,
|
178 |
-
"enable_perm":
|
179 |
"group_num": 1,
|
180 |
"group_size": 5120,
|
181 |
"in_features": 5120,
|
@@ -199,7 +199,7 @@
|
|
199 |
"model.layers.1.mlp.down_proj": {
|
200 |
"bias": null,
|
201 |
"enable_norm": true,
|
202 |
-
"enable_perm":
|
203 |
"group_num": 1,
|
204 |
"group_size": 13824,
|
205 |
"in_features": 13824,
|
@@ -223,7 +223,7 @@
|
|
223 |
"model.layers.1.mlp.gate_proj": {
|
224 |
"bias": null,
|
225 |
"enable_norm": true,
|
226 |
-
"enable_perm":
|
227 |
"group_num": 1,
|
228 |
"group_size": 5120,
|
229 |
"in_features": 5120,
|
@@ -247,7 +247,7 @@
|
|
247 |
"model.layers.1.mlp.up_proj": {
|
248 |
"bias": null,
|
249 |
"enable_norm": true,
|
250 |
-
"enable_perm":
|
251 |
"group_num": 1,
|
252 |
"group_size": 5120,
|
253 |
"in_features": 5120,
|
@@ -271,7 +271,7 @@
|
|
271 |
"model.layers.1.self_attn.k_proj": {
|
272 |
"bias": true,
|
273 |
"enable_norm": true,
|
274 |
-
"enable_perm":
|
275 |
"group_num": 1,
|
276 |
"group_size": 5120,
|
277 |
"in_features": 5120,
|
@@ -295,7 +295,7 @@
|
|
295 |
"model.layers.1.self_attn.o_proj": {
|
296 |
"bias": null,
|
297 |
"enable_norm": true,
|
298 |
-
"enable_perm":
|
299 |
"group_num": 1,
|
300 |
"group_size": 5120,
|
301 |
"in_features": 5120,
|
@@ -319,7 +319,7 @@
|
|
319 |
"model.layers.1.self_attn.q_proj": {
|
320 |
"bias": true,
|
321 |
"enable_norm": true,
|
322 |
-
"enable_perm":
|
323 |
"group_num": 1,
|
324 |
"group_size": 5120,
|
325 |
"in_features": 5120,
|
@@ -343,7 +343,7 @@
|
|
343 |
"model.layers.1.self_attn.v_proj": {
|
344 |
"bias": true,
|
345 |
"enable_norm": true,
|
346 |
-
"enable_perm":
|
347 |
"group_num": 1,
|
348 |
"group_size": 5120,
|
349 |
"in_features": 5120,
|
@@ -367,7 +367,7 @@
|
|
367 |
"model.layers.10.mlp.down_proj": {
|
368 |
"bias": null,
|
369 |
"enable_norm": true,
|
370 |
-
"enable_perm":
|
371 |
"group_num": 1,
|
372 |
"group_size": 13824,
|
373 |
"in_features": 13824,
|
@@ -391,7 +391,7 @@
|
|
391 |
"model.layers.10.mlp.gate_proj": {
|
392 |
"bias": null,
|
393 |
"enable_norm": true,
|
394 |
-
"enable_perm":
|
395 |
"group_num": 1,
|
396 |
"group_size": 5120,
|
397 |
"in_features": 5120,
|
@@ -415,7 +415,7 @@
|
|
415 |
"model.layers.10.mlp.up_proj": {
|
416 |
"bias": null,
|
417 |
"enable_norm": true,
|
418 |
-
"enable_perm":
|
419 |
"group_num": 1,
|
420 |
"group_size": 5120,
|
421 |
"in_features": 5120,
|
@@ -439,7 +439,7 @@
|
|
439 |
"model.layers.10.self_attn.k_proj": {
|
440 |
"bias": true,
|
441 |
"enable_norm": true,
|
442 |
-
"enable_perm":
|
443 |
"group_num": 1,
|
444 |
"group_size": 5120,
|
445 |
"in_features": 5120,
|
@@ -463,7 +463,7 @@
|
|
463 |
"model.layers.10.self_attn.o_proj": {
|
464 |
"bias": null,
|
465 |
"enable_norm": true,
|
466 |
-
"enable_perm":
|
467 |
"group_num": 1,
|
468 |
"group_size": 5120,
|
469 |
"in_features": 5120,
|
@@ -487,7 +487,7 @@
|
|
487 |
"model.layers.10.self_attn.q_proj": {
|
488 |
"bias": true,
|
489 |
"enable_norm": true,
|
490 |
-
"enable_perm":
|
491 |
"group_num": 1,
|
492 |
"group_size": 5120,
|
493 |
"in_features": 5120,
|
@@ -511,7 +511,7 @@
|
|
511 |
"model.layers.10.self_attn.v_proj": {
|
512 |
"bias": true,
|
513 |
"enable_norm": true,
|
514 |
-
"enable_perm":
|
515 |
"group_num": 1,
|
516 |
"group_size": 5120,
|
517 |
"in_features": 5120,
|
@@ -535,7 +535,7 @@
|
|
535 |
"model.layers.11.mlp.down_proj": {
|
536 |
"bias": null,
|
537 |
"enable_norm": true,
|
538 |
-
"enable_perm":
|
539 |
"group_num": 1,
|
540 |
"group_size": 13824,
|
541 |
"in_features": 13824,
|
@@ -559,7 +559,7 @@
|
|
559 |
"model.layers.11.mlp.gate_proj": {
|
560 |
"bias": null,
|
561 |
"enable_norm": true,
|
562 |
-
"enable_perm":
|
563 |
"group_num": 1,
|
564 |
"group_size": 5120,
|
565 |
"in_features": 5120,
|
@@ -583,7 +583,7 @@
|
|
583 |
"model.layers.11.mlp.up_proj": {
|
584 |
"bias": null,
|
585 |
"enable_norm": true,
|
586 |
-
"enable_perm":
|
587 |
"group_num": 1,
|
588 |
"group_size": 5120,
|
589 |
"in_features": 5120,
|
@@ -607,7 +607,7 @@
|
|
607 |
"model.layers.11.self_attn.k_proj": {
|
608 |
"bias": true,
|
609 |
"enable_norm": true,
|
610 |
-
"enable_perm":
|
611 |
"group_num": 1,
|
612 |
"group_size": 5120,
|
613 |
"in_features": 5120,
|
@@ -631,7 +631,7 @@
|
|
631 |
"model.layers.11.self_attn.o_proj": {
|
632 |
"bias": null,
|
633 |
"enable_norm": true,
|
634 |
-
"enable_perm":
|
635 |
"group_num": 1,
|
636 |
"group_size": 5120,
|
637 |
"in_features": 5120,
|
@@ -655,7 +655,7 @@
|
|
655 |
"model.layers.11.self_attn.q_proj": {
|
656 |
"bias": true,
|
657 |
"enable_norm": true,
|
658 |
-
"enable_perm":
|
659 |
"group_num": 1,
|
660 |
"group_size": 5120,
|
661 |
"in_features": 5120,
|
@@ -679,7 +679,7 @@
|
|
679 |
"model.layers.11.self_attn.v_proj": {
|
680 |
"bias": true,
|
681 |
"enable_norm": true,
|
682 |
-
"enable_perm":
|
683 |
"group_num": 1,
|
684 |
"group_size": 5120,
|
685 |
"in_features": 5120,
|
@@ -703,7 +703,7 @@
|
|
703 |
"model.layers.12.mlp.down_proj": {
|
704 |
"bias": null,
|
705 |
"enable_norm": true,
|
706 |
-
"enable_perm":
|
707 |
"group_num": 1,
|
708 |
"group_size": 13824,
|
709 |
"in_features": 13824,
|
@@ -727,7 +727,7 @@
|
|
727 |
"model.layers.12.mlp.gate_proj": {
|
728 |
"bias": null,
|
729 |
"enable_norm": true,
|
730 |
-
"enable_perm":
|
731 |
"group_num": 1,
|
732 |
"group_size": 5120,
|
733 |
"in_features": 5120,
|
@@ -751,7 +751,7 @@
|
|
751 |
"model.layers.12.mlp.up_proj": {
|
752 |
"bias": null,
|
753 |
"enable_norm": true,
|
754 |
-
"enable_perm":
|
755 |
"group_num": 1,
|
756 |
"group_size": 5120,
|
757 |
"in_features": 5120,
|
@@ -775,7 +775,7 @@
|
|
775 |
"model.layers.12.self_attn.k_proj": {
|
776 |
"bias": true,
|
777 |
"enable_norm": true,
|
778 |
-
"enable_perm":
|
779 |
"group_num": 1,
|
780 |
"group_size": 5120,
|
781 |
"in_features": 5120,
|
@@ -799,7 +799,7 @@
|
|
799 |
"model.layers.12.self_attn.o_proj": {
|
800 |
"bias": null,
|
801 |
"enable_norm": true,
|
802 |
-
"enable_perm":
|
803 |
"group_num": 1,
|
804 |
"group_size": 5120,
|
805 |
"in_features": 5120,
|
@@ -823,7 +823,7 @@
|
|
823 |
"model.layers.12.self_attn.q_proj": {
|
824 |
"bias": true,
|
825 |
"enable_norm": true,
|
826 |
-
"enable_perm":
|
827 |
"group_num": 1,
|
828 |
"group_size": 5120,
|
829 |
"in_features": 5120,
|
@@ -847,7 +847,7 @@
|
|
847 |
"model.layers.12.self_attn.v_proj": {
|
848 |
"bias": true,
|
849 |
"enable_norm": true,
|
850 |
-
"enable_perm":
|
851 |
"group_num": 1,
|
852 |
"group_size": 5120,
|
853 |
"in_features": 5120,
|
@@ -871,7 +871,7 @@
|
|
871 |
"model.layers.13.mlp.down_proj": {
|
872 |
"bias": null,
|
873 |
"enable_norm": true,
|
874 |
-
"enable_perm":
|
875 |
"group_num": 1,
|
876 |
"group_size": 13824,
|
877 |
"in_features": 13824,
|
@@ -895,7 +895,7 @@
|
|
895 |
"model.layers.13.mlp.gate_proj": {
|
896 |
"bias": null,
|
897 |
"enable_norm": true,
|
898 |
-
"enable_perm":
|
899 |
"group_num": 1,
|
900 |
"group_size": 5120,
|
901 |
"in_features": 5120,
|
@@ -919,7 +919,7 @@
|
|
919 |
"model.layers.13.mlp.up_proj": {
|
920 |
"bias": null,
|
921 |
"enable_norm": true,
|
922 |
-
"enable_perm":
|
923 |
"group_num": 1,
|
924 |
"group_size": 5120,
|
925 |
"in_features": 5120,
|
@@ -943,7 +943,7 @@
|
|
943 |
"model.layers.13.self_attn.k_proj": {
|
944 |
"bias": true,
|
945 |
"enable_norm": true,
|
946 |
-
"enable_perm":
|
947 |
"group_num": 1,
|
948 |
"group_size": 5120,
|
949 |
"in_features": 5120,
|
@@ -967,7 +967,7 @@
|
|
967 |
"model.layers.13.self_attn.o_proj": {
|
968 |
"bias": null,
|
969 |
"enable_norm": true,
|
970 |
-
"enable_perm":
|
971 |
"group_num": 1,
|
972 |
"group_size": 5120,
|
973 |
"in_features": 5120,
|
@@ -991,7 +991,7 @@
|
|
991 |
"model.layers.13.self_attn.q_proj": {
|
992 |
"bias": true,
|
993 |
"enable_norm": true,
|
994 |
-
"enable_perm":
|
995 |
"group_num": 1,
|
996 |
"group_size": 5120,
|
997 |
"in_features": 5120,
|
@@ -1015,7 +1015,7 @@
|
|
1015 |
"model.layers.13.self_attn.v_proj": {
|
1016 |
"bias": true,
|
1017 |
"enable_norm": true,
|
1018 |
-
"enable_perm":
|
1019 |
"group_num": 1,
|
1020 |
"group_size": 5120,
|
1021 |
"in_features": 5120,
|
@@ -1039,7 +1039,7 @@
|
|
1039 |
"model.layers.14.mlp.down_proj": {
|
1040 |
"bias": null,
|
1041 |
"enable_norm": true,
|
1042 |
-
"enable_perm":
|
1043 |
"group_num": 1,
|
1044 |
"group_size": 13824,
|
1045 |
"in_features": 13824,
|
@@ -1063,7 +1063,7 @@
|
|
1063 |
"model.layers.14.mlp.gate_proj": {
|
1064 |
"bias": null,
|
1065 |
"enable_norm": true,
|
1066 |
-
"enable_perm":
|
1067 |
"group_num": 1,
|
1068 |
"group_size": 5120,
|
1069 |
"in_features": 5120,
|
@@ -1087,7 +1087,7 @@
|
|
1087 |
"model.layers.14.mlp.up_proj": {
|
1088 |
"bias": null,
|
1089 |
"enable_norm": true,
|
1090 |
-
"enable_perm":
|
1091 |
"group_num": 1,
|
1092 |
"group_size": 5120,
|
1093 |
"in_features": 5120,
|
@@ -1111,7 +1111,7 @@
|
|
1111 |
"model.layers.14.self_attn.k_proj": {
|
1112 |
"bias": true,
|
1113 |
"enable_norm": true,
|
1114 |
-
"enable_perm":
|
1115 |
"group_num": 1,
|
1116 |
"group_size": 5120,
|
1117 |
"in_features": 5120,
|
@@ -1135,7 +1135,7 @@
|
|
1135 |
"model.layers.14.self_attn.o_proj": {
|
1136 |
"bias": null,
|
1137 |
"enable_norm": true,
|
1138 |
-
"enable_perm":
|
1139 |
"group_num": 1,
|
1140 |
"group_size": 5120,
|
1141 |
"in_features": 5120,
|
@@ -1159,7 +1159,7 @@
|
|
1159 |
"model.layers.14.self_attn.q_proj": {
|
1160 |
"bias": true,
|
1161 |
"enable_norm": true,
|
1162 |
-
"enable_perm":
|
1163 |
"group_num": 1,
|
1164 |
"group_size": 5120,
|
1165 |
"in_features": 5120,
|
@@ -1183,7 +1183,7 @@
|
|
1183 |
"model.layers.14.self_attn.v_proj": {
|
1184 |
"bias": true,
|
1185 |
"enable_norm": true,
|
1186 |
-
"enable_perm":
|
1187 |
"group_num": 1,
|
1188 |
"group_size": 5120,
|
1189 |
"in_features": 5120,
|
@@ -1207,7 +1207,7 @@
|
|
1207 |
"model.layers.15.mlp.down_proj": {
|
1208 |
"bias": null,
|
1209 |
"enable_norm": true,
|
1210 |
-
"enable_perm":
|
1211 |
"group_num": 1,
|
1212 |
"group_size": 13824,
|
1213 |
"in_features": 13824,
|
@@ -1231,7 +1231,7 @@
|
|
1231 |
"model.layers.15.mlp.gate_proj": {
|
1232 |
"bias": null,
|
1233 |
"enable_norm": true,
|
1234 |
-
"enable_perm":
|
1235 |
"group_num": 1,
|
1236 |
"group_size": 5120,
|
1237 |
"in_features": 5120,
|
@@ -1255,7 +1255,7 @@
|
|
1255 |
"model.layers.15.mlp.up_proj": {
|
1256 |
"bias": null,
|
1257 |
"enable_norm": true,
|
1258 |
-
"enable_perm":
|
1259 |
"group_num": 1,
|
1260 |
"group_size": 5120,
|
1261 |
"in_features": 5120,
|
@@ -1279,7 +1279,7 @@
|
|
1279 |
"model.layers.15.self_attn.k_proj": {
|
1280 |
"bias": true,
|
1281 |
"enable_norm": true,
|
1282 |
-
"enable_perm":
|
1283 |
"group_num": 1,
|
1284 |
"group_size": 5120,
|
1285 |
"in_features": 5120,
|
@@ -1303,7 +1303,7 @@
|
|
1303 |
"model.layers.15.self_attn.o_proj": {
|
1304 |
"bias": null,
|
1305 |
"enable_norm": true,
|
1306 |
-
"enable_perm":
|
1307 |
"group_num": 1,
|
1308 |
"group_size": 5120,
|
1309 |
"in_features": 5120,
|
@@ -1327,7 +1327,7 @@
|
|
1327 |
"model.layers.15.self_attn.q_proj": {
|
1328 |
"bias": true,
|
1329 |
"enable_norm": true,
|
1330 |
-
"enable_perm":
|
1331 |
"group_num": 1,
|
1332 |
"group_size": 5120,
|
1333 |
"in_features": 5120,
|
@@ -1351,7 +1351,7 @@
|
|
1351 |
"model.layers.15.self_attn.v_proj": {
|
1352 |
"bias": true,
|
1353 |
"enable_norm": true,
|
1354 |
-
"enable_perm":
|
1355 |
"group_num": 1,
|
1356 |
"group_size": 5120,
|
1357 |
"in_features": 5120,
|
@@ -1375,7 +1375,7 @@
|
|
1375 |
"model.layers.16.mlp.down_proj": {
|
1376 |
"bias": null,
|
1377 |
"enable_norm": true,
|
1378 |
-
"enable_perm":
|
1379 |
"group_num": 1,
|
1380 |
"group_size": 13824,
|
1381 |
"in_features": 13824,
|
@@ -1399,7 +1399,7 @@
|
|
1399 |
"model.layers.16.mlp.gate_proj": {
|
1400 |
"bias": null,
|
1401 |
"enable_norm": true,
|
1402 |
-
"enable_perm":
|
1403 |
"group_num": 1,
|
1404 |
"group_size": 5120,
|
1405 |
"in_features": 5120,
|
@@ -1423,7 +1423,7 @@
|
|
1423 |
"model.layers.16.mlp.up_proj": {
|
1424 |
"bias": null,
|
1425 |
"enable_norm": true,
|
1426 |
-
"enable_perm":
|
1427 |
"group_num": 1,
|
1428 |
"group_size": 5120,
|
1429 |
"in_features": 5120,
|
@@ -1447,7 +1447,7 @@
|
|
1447 |
"model.layers.16.self_attn.k_proj": {
|
1448 |
"bias": true,
|
1449 |
"enable_norm": true,
|
1450 |
-
"enable_perm":
|
1451 |
"group_num": 1,
|
1452 |
"group_size": 5120,
|
1453 |
"in_features": 5120,
|
@@ -1471,7 +1471,7 @@
|
|
1471 |
"model.layers.16.self_attn.o_proj": {
|
1472 |
"bias": null,
|
1473 |
"enable_norm": true,
|
1474 |
-
"enable_perm":
|
1475 |
"group_num": 1,
|
1476 |
"group_size": 5120,
|
1477 |
"in_features": 5120,
|
@@ -1495,7 +1495,7 @@
|
|
1495 |
"model.layers.16.self_attn.q_proj": {
|
1496 |
"bias": true,
|
1497 |
"enable_norm": true,
|
1498 |
-
"enable_perm":
|
1499 |
"group_num": 1,
|
1500 |
"group_size": 5120,
|
1501 |
"in_features": 5120,
|
@@ -1519,7 +1519,7 @@
|
|
1519 |
"model.layers.16.self_attn.v_proj": {
|
1520 |
"bias": true,
|
1521 |
"enable_norm": true,
|
1522 |
-
"enable_perm":
|
1523 |
"group_num": 1,
|
1524 |
"group_size": 5120,
|
1525 |
"in_features": 5120,
|
@@ -1543,7 +1543,7 @@
|
|
1543 |
"model.layers.17.mlp.down_proj": {
|
1544 |
"bias": null,
|
1545 |
"enable_norm": true,
|
1546 |
-
"enable_perm":
|
1547 |
"group_num": 1,
|
1548 |
"group_size": 13824,
|
1549 |
"in_features": 13824,
|
@@ -1567,7 +1567,7 @@
|
|
1567 |
"model.layers.17.mlp.gate_proj": {
|
1568 |
"bias": null,
|
1569 |
"enable_norm": true,
|
1570 |
-
"enable_perm":
|
1571 |
"group_num": 1,
|
1572 |
"group_size": 5120,
|
1573 |
"in_features": 5120,
|
@@ -1591,7 +1591,7 @@
|
|
1591 |
"model.layers.17.mlp.up_proj": {
|
1592 |
"bias": null,
|
1593 |
"enable_norm": true,
|
1594 |
-
"enable_perm":
|
1595 |
"group_num": 1,
|
1596 |
"group_size": 5120,
|
1597 |
"in_features": 5120,
|
@@ -1615,7 +1615,7 @@
|
|
1615 |
"model.layers.17.self_attn.k_proj": {
|
1616 |
"bias": true,
|
1617 |
"enable_norm": true,
|
1618 |
-
"enable_perm":
|
1619 |
"group_num": 1,
|
1620 |
"group_size": 5120,
|
1621 |
"in_features": 5120,
|
@@ -1639,7 +1639,7 @@
|
|
1639 |
"model.layers.17.self_attn.o_proj": {
|
1640 |
"bias": null,
|
1641 |
"enable_norm": true,
|
1642 |
-
"enable_perm":
|
1643 |
"group_num": 1,
|
1644 |
"group_size": 5120,
|
1645 |
"in_features": 5120,
|
@@ -1663,7 +1663,7 @@
|
|
1663 |
"model.layers.17.self_attn.q_proj": {
|
1664 |
"bias": true,
|
1665 |
"enable_norm": true,
|
1666 |
-
"enable_perm":
|
1667 |
"group_num": 1,
|
1668 |
"group_size": 5120,
|
1669 |
"in_features": 5120,
|
@@ -1687,7 +1687,7 @@
|
|
1687 |
"model.layers.17.self_attn.v_proj": {
|
1688 |
"bias": true,
|
1689 |
"enable_norm": true,
|
1690 |
-
"enable_perm":
|
1691 |
"group_num": 1,
|
1692 |
"group_size": 5120,
|
1693 |
"in_features": 5120,
|
@@ -1711,7 +1711,7 @@
|
|
1711 |
"model.layers.18.mlp.down_proj": {
|
1712 |
"bias": null,
|
1713 |
"enable_norm": true,
|
1714 |
-
"enable_perm":
|
1715 |
"group_num": 1,
|
1716 |
"group_size": 13824,
|
1717 |
"in_features": 13824,
|
@@ -1735,7 +1735,7 @@
|
|
1735 |
"model.layers.18.mlp.gate_proj": {
|
1736 |
"bias": null,
|
1737 |
"enable_norm": true,
|
1738 |
-
"enable_perm":
|
1739 |
"group_num": 1,
|
1740 |
"group_size": 5120,
|
1741 |
"in_features": 5120,
|
@@ -1759,7 +1759,7 @@
|
|
1759 |
"model.layers.18.mlp.up_proj": {
|
1760 |
"bias": null,
|
1761 |
"enable_norm": true,
|
1762 |
-
"enable_perm":
|
1763 |
"group_num": 1,
|
1764 |
"group_size": 5120,
|
1765 |
"in_features": 5120,
|
@@ -1783,7 +1783,7 @@
|
|
1783 |
"model.layers.18.self_attn.k_proj": {
|
1784 |
"bias": true,
|
1785 |
"enable_norm": true,
|
1786 |
-
"enable_perm":
|
1787 |
"group_num": 1,
|
1788 |
"group_size": 5120,
|
1789 |
"in_features": 5120,
|
@@ -1807,7 +1807,7 @@
|
|
1807 |
"model.layers.18.self_attn.o_proj": {
|
1808 |
"bias": null,
|
1809 |
"enable_norm": true,
|
1810 |
-
"enable_perm":
|
1811 |
"group_num": 1,
|
1812 |
"group_size": 5120,
|
1813 |
"in_features": 5120,
|
@@ -1831,7 +1831,7 @@
|
|
1831 |
"model.layers.18.self_attn.q_proj": {
|
1832 |
"bias": true,
|
1833 |
"enable_norm": true,
|
1834 |
-
"enable_perm":
|
1835 |
"group_num": 1,
|
1836 |
"group_size": 5120,
|
1837 |
"in_features": 5120,
|
@@ -1855,7 +1855,7 @@
|
|
1855 |
"model.layers.18.self_attn.v_proj": {
|
1856 |
"bias": true,
|
1857 |
"enable_norm": true,
|
1858 |
-
"enable_perm":
|
1859 |
"group_num": 1,
|
1860 |
"group_size": 5120,
|
1861 |
"in_features": 5120,
|
@@ -1879,7 +1879,7 @@
|
|
1879 |
"model.layers.19.mlp.down_proj": {
|
1880 |
"bias": null,
|
1881 |
"enable_norm": true,
|
1882 |
-
"enable_perm":
|
1883 |
"group_num": 1,
|
1884 |
"group_size": 13824,
|
1885 |
"in_features": 13824,
|
@@ -1903,7 +1903,7 @@
|
|
1903 |
"model.layers.19.mlp.gate_proj": {
|
1904 |
"bias": null,
|
1905 |
"enable_norm": true,
|
1906 |
-
"enable_perm":
|
1907 |
"group_num": 1,
|
1908 |
"group_size": 5120,
|
1909 |
"in_features": 5120,
|
@@ -1927,7 +1927,7 @@
|
|
1927 |
"model.layers.19.mlp.up_proj": {
|
1928 |
"bias": null,
|
1929 |
"enable_norm": true,
|
1930 |
-
"enable_perm":
|
1931 |
"group_num": 1,
|
1932 |
"group_size": 5120,
|
1933 |
"in_features": 5120,
|
@@ -1951,7 +1951,7 @@
|
|
1951 |
"model.layers.19.self_attn.k_proj": {
|
1952 |
"bias": true,
|
1953 |
"enable_norm": true,
|
1954 |
-
"enable_perm":
|
1955 |
"group_num": 1,
|
1956 |
"group_size": 5120,
|
1957 |
"in_features": 5120,
|
@@ -1975,7 +1975,7 @@
|
|
1975 |
"model.layers.19.self_attn.o_proj": {
|
1976 |
"bias": null,
|
1977 |
"enable_norm": true,
|
1978 |
-
"enable_perm":
|
1979 |
"group_num": 1,
|
1980 |
"group_size": 5120,
|
1981 |
"in_features": 5120,
|
@@ -1999,7 +1999,7 @@
|
|
1999 |
"model.layers.19.self_attn.q_proj": {
|
2000 |
"bias": true,
|
2001 |
"enable_norm": true,
|
2002 |
-
"enable_perm":
|
2003 |
"group_num": 1,
|
2004 |
"group_size": 5120,
|
2005 |
"in_features": 5120,
|
@@ -2023,7 +2023,7 @@
|
|
2023 |
"model.layers.19.self_attn.v_proj": {
|
2024 |
"bias": true,
|
2025 |
"enable_norm": true,
|
2026 |
-
"enable_perm":
|
2027 |
"group_num": 1,
|
2028 |
"group_size": 5120,
|
2029 |
"in_features": 5120,
|
@@ -2047,7 +2047,7 @@
|
|
2047 |
"model.layers.2.mlp.down_proj": {
|
2048 |
"bias": null,
|
2049 |
"enable_norm": true,
|
2050 |
-
"enable_perm":
|
2051 |
"group_num": 1,
|
2052 |
"group_size": 13824,
|
2053 |
"in_features": 13824,
|
@@ -2071,7 +2071,7 @@
|
|
2071 |
"model.layers.2.mlp.gate_proj": {
|
2072 |
"bias": null,
|
2073 |
"enable_norm": true,
|
2074 |
-
"enable_perm":
|
2075 |
"group_num": 1,
|
2076 |
"group_size": 5120,
|
2077 |
"in_features": 5120,
|
@@ -2095,7 +2095,7 @@
|
|
2095 |
"model.layers.2.mlp.up_proj": {
|
2096 |
"bias": null,
|
2097 |
"enable_norm": true,
|
2098 |
-
"enable_perm":
|
2099 |
"group_num": 1,
|
2100 |
"group_size": 5120,
|
2101 |
"in_features": 5120,
|
@@ -2119,7 +2119,7 @@
|
|
2119 |
"model.layers.2.self_attn.k_proj": {
|
2120 |
"bias": true,
|
2121 |
"enable_norm": true,
|
2122 |
-
"enable_perm":
|
2123 |
"group_num": 1,
|
2124 |
"group_size": 5120,
|
2125 |
"in_features": 5120,
|
@@ -2143,7 +2143,7 @@
|
|
2143 |
"model.layers.2.self_attn.o_proj": {
|
2144 |
"bias": null,
|
2145 |
"enable_norm": true,
|
2146 |
-
"enable_perm":
|
2147 |
"group_num": 1,
|
2148 |
"group_size": 5120,
|
2149 |
"in_features": 5120,
|
@@ -2167,7 +2167,7 @@
|
|
2167 |
"model.layers.2.self_attn.q_proj": {
|
2168 |
"bias": true,
|
2169 |
"enable_norm": true,
|
2170 |
-
"enable_perm":
|
2171 |
"group_num": 1,
|
2172 |
"group_size": 5120,
|
2173 |
"in_features": 5120,
|
@@ -2191,7 +2191,7 @@
|
|
2191 |
"model.layers.2.self_attn.v_proj": {
|
2192 |
"bias": true,
|
2193 |
"enable_norm": true,
|
2194 |
-
"enable_perm":
|
2195 |
"group_num": 1,
|
2196 |
"group_size": 5120,
|
2197 |
"in_features": 5120,
|
@@ -2215,7 +2215,7 @@
|
|
2215 |
"model.layers.20.mlp.down_proj": {
|
2216 |
"bias": null,
|
2217 |
"enable_norm": true,
|
2218 |
-
"enable_perm":
|
2219 |
"group_num": 1,
|
2220 |
"group_size": 13824,
|
2221 |
"in_features": 13824,
|
@@ -2239,7 +2239,7 @@
|
|
2239 |
"model.layers.20.mlp.gate_proj": {
|
2240 |
"bias": null,
|
2241 |
"enable_norm": true,
|
2242 |
-
"enable_perm":
|
2243 |
"group_num": 1,
|
2244 |
"group_size": 5120,
|
2245 |
"in_features": 5120,
|
@@ -2263,7 +2263,7 @@
|
|
2263 |
"model.layers.20.mlp.up_proj": {
|
2264 |
"bias": null,
|
2265 |
"enable_norm": true,
|
2266 |
-
"enable_perm":
|
2267 |
"group_num": 1,
|
2268 |
"group_size": 5120,
|
2269 |
"in_features": 5120,
|
@@ -2287,7 +2287,7 @@
|
|
2287 |
"model.layers.20.self_attn.k_proj": {
|
2288 |
"bias": true,
|
2289 |
"enable_norm": true,
|
2290 |
-
"enable_perm":
|
2291 |
"group_num": 1,
|
2292 |
"group_size": 5120,
|
2293 |
"in_features": 5120,
|
@@ -2311,7 +2311,7 @@
|
|
2311 |
"model.layers.20.self_attn.o_proj": {
|
2312 |
"bias": null,
|
2313 |
"enable_norm": true,
|
2314 |
-
"enable_perm":
|
2315 |
"group_num": 1,
|
2316 |
"group_size": 5120,
|
2317 |
"in_features": 5120,
|
@@ -2335,7 +2335,7 @@
|
|
2335 |
"model.layers.20.self_attn.q_proj": {
|
2336 |
"bias": true,
|
2337 |
"enable_norm": true,
|
2338 |
-
"enable_perm":
|
2339 |
"group_num": 1,
|
2340 |
"group_size": 5120,
|
2341 |
"in_features": 5120,
|
@@ -2359,7 +2359,7 @@
|
|
2359 |
"model.layers.20.self_attn.v_proj": {
|
2360 |
"bias": true,
|
2361 |
"enable_norm": true,
|
2362 |
-
"enable_perm":
|
2363 |
"group_num": 1,
|
2364 |
"group_size": 5120,
|
2365 |
"in_features": 5120,
|
@@ -2383,7 +2383,7 @@
|
|
2383 |
"model.layers.21.mlp.down_proj": {
|
2384 |
"bias": null,
|
2385 |
"enable_norm": true,
|
2386 |
-
"enable_perm":
|
2387 |
"group_num": 1,
|
2388 |
"group_size": 13824,
|
2389 |
"in_features": 13824,
|
@@ -2407,7 +2407,7 @@
|
|
2407 |
"model.layers.21.mlp.gate_proj": {
|
2408 |
"bias": null,
|
2409 |
"enable_norm": true,
|
2410 |
-
"enable_perm":
|
2411 |
"group_num": 1,
|
2412 |
"group_size": 5120,
|
2413 |
"in_features": 5120,
|
@@ -2431,7 +2431,7 @@
|
|
2431 |
"model.layers.21.mlp.up_proj": {
|
2432 |
"bias": null,
|
2433 |
"enable_norm": true,
|
2434 |
-
"enable_perm":
|
2435 |
"group_num": 1,
|
2436 |
"group_size": 5120,
|
2437 |
"in_features": 5120,
|
@@ -2455,7 +2455,7 @@
|
|
2455 |
"model.layers.21.self_attn.k_proj": {
|
2456 |
"bias": true,
|
2457 |
"enable_norm": true,
|
2458 |
-
"enable_perm":
|
2459 |
"group_num": 1,
|
2460 |
"group_size": 5120,
|
2461 |
"in_features": 5120,
|
@@ -2479,7 +2479,7 @@
|
|
2479 |
"model.layers.21.self_attn.o_proj": {
|
2480 |
"bias": null,
|
2481 |
"enable_norm": true,
|
2482 |
-
"enable_perm":
|
2483 |
"group_num": 1,
|
2484 |
"group_size": 5120,
|
2485 |
"in_features": 5120,
|
@@ -2503,7 +2503,7 @@
|
|
2503 |
"model.layers.21.self_attn.q_proj": {
|
2504 |
"bias": true,
|
2505 |
"enable_norm": true,
|
2506 |
-
"enable_perm":
|
2507 |
"group_num": 1,
|
2508 |
"group_size": 5120,
|
2509 |
"in_features": 5120,
|
@@ -2527,7 +2527,7 @@
|
|
2527 |
"model.layers.21.self_attn.v_proj": {
|
2528 |
"bias": true,
|
2529 |
"enable_norm": true,
|
2530 |
-
"enable_perm":
|
2531 |
"group_num": 1,
|
2532 |
"group_size": 5120,
|
2533 |
"in_features": 5120,
|
@@ -2551,7 +2551,7 @@
|
|
2551 |
"model.layers.22.mlp.down_proj": {
|
2552 |
"bias": null,
|
2553 |
"enable_norm": true,
|
2554 |
-
"enable_perm":
|
2555 |
"group_num": 1,
|
2556 |
"group_size": 13824,
|
2557 |
"in_features": 13824,
|
@@ -2575,7 +2575,7 @@
|
|
2575 |
"model.layers.22.mlp.gate_proj": {
|
2576 |
"bias": null,
|
2577 |
"enable_norm": true,
|
2578 |
-
"enable_perm":
|
2579 |
"group_num": 1,
|
2580 |
"group_size": 5120,
|
2581 |
"in_features": 5120,
|
@@ -2599,7 +2599,7 @@
|
|
2599 |
"model.layers.22.mlp.up_proj": {
|
2600 |
"bias": null,
|
2601 |
"enable_norm": true,
|
2602 |
-
"enable_perm":
|
2603 |
"group_num": 1,
|
2604 |
"group_size": 5120,
|
2605 |
"in_features": 5120,
|
@@ -2623,7 +2623,7 @@
|
|
2623 |
"model.layers.22.self_attn.k_proj": {
|
2624 |
"bias": true,
|
2625 |
"enable_norm": true,
|
2626 |
-
"enable_perm":
|
2627 |
"group_num": 1,
|
2628 |
"group_size": 5120,
|
2629 |
"in_features": 5120,
|
@@ -2647,7 +2647,7 @@
|
|
2647 |
"model.layers.22.self_attn.o_proj": {
|
2648 |
"bias": null,
|
2649 |
"enable_norm": true,
|
2650 |
-
"enable_perm":
|
2651 |
"group_num": 1,
|
2652 |
"group_size": 5120,
|
2653 |
"in_features": 5120,
|
@@ -2671,7 +2671,7 @@
|
|
2671 |
"model.layers.22.self_attn.q_proj": {
|
2672 |
"bias": true,
|
2673 |
"enable_norm": true,
|
2674 |
-
"enable_perm":
|
2675 |
"group_num": 1,
|
2676 |
"group_size": 5120,
|
2677 |
"in_features": 5120,
|
@@ -2695,7 +2695,7 @@
|
|
2695 |
"model.layers.22.self_attn.v_proj": {
|
2696 |
"bias": true,
|
2697 |
"enable_norm": true,
|
2698 |
-
"enable_perm":
|
2699 |
"group_num": 1,
|
2700 |
"group_size": 5120,
|
2701 |
"in_features": 5120,
|
@@ -2719,7 +2719,7 @@
|
|
2719 |
"model.layers.23.mlp.down_proj": {
|
2720 |
"bias": null,
|
2721 |
"enable_norm": true,
|
2722 |
-
"enable_perm":
|
2723 |
"group_num": 1,
|
2724 |
"group_size": 13824,
|
2725 |
"in_features": 13824,
|
@@ -2743,7 +2743,7 @@
|
|
2743 |
"model.layers.23.mlp.gate_proj": {
|
2744 |
"bias": null,
|
2745 |
"enable_norm": true,
|
2746 |
-
"enable_perm":
|
2747 |
"group_num": 1,
|
2748 |
"group_size": 5120,
|
2749 |
"in_features": 5120,
|
@@ -2767,7 +2767,7 @@
|
|
2767 |
"model.layers.23.mlp.up_proj": {
|
2768 |
"bias": null,
|
2769 |
"enable_norm": true,
|
2770 |
-
"enable_perm":
|
2771 |
"group_num": 1,
|
2772 |
"group_size": 5120,
|
2773 |
"in_features": 5120,
|
@@ -2791,7 +2791,7 @@
|
|
2791 |
"model.layers.23.self_attn.k_proj": {
|
2792 |
"bias": true,
|
2793 |
"enable_norm": true,
|
2794 |
-
"enable_perm":
|
2795 |
"group_num": 1,
|
2796 |
"group_size": 5120,
|
2797 |
"in_features": 5120,
|
@@ -2815,7 +2815,7 @@
|
|
2815 |
"model.layers.23.self_attn.o_proj": {
|
2816 |
"bias": null,
|
2817 |
"enable_norm": true,
|
2818 |
-
"enable_perm":
|
2819 |
"group_num": 1,
|
2820 |
"group_size": 5120,
|
2821 |
"in_features": 5120,
|
@@ -2839,7 +2839,7 @@
|
|
2839 |
"model.layers.23.self_attn.q_proj": {
|
2840 |
"bias": true,
|
2841 |
"enable_norm": true,
|
2842 |
-
"enable_perm":
|
2843 |
"group_num": 1,
|
2844 |
"group_size": 5120,
|
2845 |
"in_features": 5120,
|
@@ -2863,7 +2863,7 @@
|
|
2863 |
"model.layers.23.self_attn.v_proj": {
|
2864 |
"bias": true,
|
2865 |
"enable_norm": true,
|
2866 |
-
"enable_perm":
|
2867 |
"group_num": 1,
|
2868 |
"group_size": 5120,
|
2869 |
"in_features": 5120,
|
@@ -2887,7 +2887,7 @@
|
|
2887 |
"model.layers.24.mlp.down_proj": {
|
2888 |
"bias": null,
|
2889 |
"enable_norm": true,
|
2890 |
-
"enable_perm":
|
2891 |
"group_num": 1,
|
2892 |
"group_size": 13824,
|
2893 |
"in_features": 13824,
|
@@ -2911,7 +2911,7 @@
|
|
2911 |
"model.layers.24.mlp.gate_proj": {
|
2912 |
"bias": null,
|
2913 |
"enable_norm": true,
|
2914 |
-
"enable_perm":
|
2915 |
"group_num": 1,
|
2916 |
"group_size": 5120,
|
2917 |
"in_features": 5120,
|
@@ -2935,7 +2935,7 @@
|
|
2935 |
"model.layers.24.mlp.up_proj": {
|
2936 |
"bias": null,
|
2937 |
"enable_norm": true,
|
2938 |
-
"enable_perm":
|
2939 |
"group_num": 1,
|
2940 |
"group_size": 5120,
|
2941 |
"in_features": 5120,
|
@@ -2959,7 +2959,7 @@
|
|
2959 |
"model.layers.24.self_attn.k_proj": {
|
2960 |
"bias": true,
|
2961 |
"enable_norm": true,
|
2962 |
-
"enable_perm":
|
2963 |
"group_num": 1,
|
2964 |
"group_size": 5120,
|
2965 |
"in_features": 5120,
|
@@ -2983,7 +2983,7 @@
|
|
2983 |
"model.layers.24.self_attn.o_proj": {
|
2984 |
"bias": null,
|
2985 |
"enable_norm": true,
|
2986 |
-
"enable_perm":
|
2987 |
"group_num": 1,
|
2988 |
"group_size": 5120,
|
2989 |
"in_features": 5120,
|
@@ -3007,7 +3007,7 @@
|
|
3007 |
"model.layers.24.self_attn.q_proj": {
|
3008 |
"bias": true,
|
3009 |
"enable_norm": true,
|
3010 |
-
"enable_perm":
|
3011 |
"group_num": 1,
|
3012 |
"group_size": 5120,
|
3013 |
"in_features": 5120,
|
@@ -3031,7 +3031,7 @@
|
|
3031 |
"model.layers.24.self_attn.v_proj": {
|
3032 |
"bias": true,
|
3033 |
"enable_norm": true,
|
3034 |
-
"enable_perm":
|
3035 |
"group_num": 1,
|
3036 |
"group_size": 5120,
|
3037 |
"in_features": 5120,
|
@@ -3055,7 +3055,7 @@
|
|
3055 |
"model.layers.25.mlp.down_proj": {
|
3056 |
"bias": null,
|
3057 |
"enable_norm": true,
|
3058 |
-
"enable_perm":
|
3059 |
"group_num": 1,
|
3060 |
"group_size": 13824,
|
3061 |
"in_features": 13824,
|
@@ -3079,7 +3079,7 @@
|
|
3079 |
"model.layers.25.mlp.gate_proj": {
|
3080 |
"bias": null,
|
3081 |
"enable_norm": true,
|
3082 |
-
"enable_perm":
|
3083 |
"group_num": 1,
|
3084 |
"group_size": 5120,
|
3085 |
"in_features": 5120,
|
@@ -3103,7 +3103,7 @@
|
|
3103 |
"model.layers.25.mlp.up_proj": {
|
3104 |
"bias": null,
|
3105 |
"enable_norm": true,
|
3106 |
-
"enable_perm":
|
3107 |
"group_num": 1,
|
3108 |
"group_size": 5120,
|
3109 |
"in_features": 5120,
|
@@ -3127,7 +3127,7 @@
|
|
3127 |
"model.layers.25.self_attn.k_proj": {
|
3128 |
"bias": true,
|
3129 |
"enable_norm": true,
|
3130 |
-
"enable_perm":
|
3131 |
"group_num": 1,
|
3132 |
"group_size": 5120,
|
3133 |
"in_features": 5120,
|
@@ -3151,7 +3151,7 @@
|
|
3151 |
"model.layers.25.self_attn.o_proj": {
|
3152 |
"bias": null,
|
3153 |
"enable_norm": true,
|
3154 |
-
"enable_perm":
|
3155 |
"group_num": 1,
|
3156 |
"group_size": 5120,
|
3157 |
"in_features": 5120,
|
@@ -3175,7 +3175,7 @@
|
|
3175 |
"model.layers.25.self_attn.q_proj": {
|
3176 |
"bias": true,
|
3177 |
"enable_norm": true,
|
3178 |
-
"enable_perm":
|
3179 |
"group_num": 1,
|
3180 |
"group_size": 5120,
|
3181 |
"in_features": 5120,
|
@@ -3199,7 +3199,7 @@
|
|
3199 |
"model.layers.25.self_attn.v_proj": {
|
3200 |
"bias": true,
|
3201 |
"enable_norm": true,
|
3202 |
-
"enable_perm":
|
3203 |
"group_num": 1,
|
3204 |
"group_size": 5120,
|
3205 |
"in_features": 5120,
|
@@ -3223,7 +3223,7 @@
|
|
3223 |
"model.layers.26.mlp.down_proj": {
|
3224 |
"bias": null,
|
3225 |
"enable_norm": true,
|
3226 |
-
"enable_perm":
|
3227 |
"group_num": 1,
|
3228 |
"group_size": 13824,
|
3229 |
"in_features": 13824,
|
@@ -3247,7 +3247,7 @@
|
|
3247 |
"model.layers.26.mlp.gate_proj": {
|
3248 |
"bias": null,
|
3249 |
"enable_norm": true,
|
3250 |
-
"enable_perm":
|
3251 |
"group_num": 1,
|
3252 |
"group_size": 5120,
|
3253 |
"in_features": 5120,
|
@@ -3271,7 +3271,7 @@
|
|
3271 |
"model.layers.26.mlp.up_proj": {
|
3272 |
"bias": null,
|
3273 |
"enable_norm": true,
|
3274 |
-
"enable_perm":
|
3275 |
"group_num": 1,
|
3276 |
"group_size": 5120,
|
3277 |
"in_features": 5120,
|
@@ -3295,7 +3295,7 @@
|
|
3295 |
"model.layers.26.self_attn.k_proj": {
|
3296 |
"bias": true,
|
3297 |
"enable_norm": true,
|
3298 |
-
"enable_perm":
|
3299 |
"group_num": 1,
|
3300 |
"group_size": 5120,
|
3301 |
"in_features": 5120,
|
@@ -3319,7 +3319,7 @@
|
|
3319 |
"model.layers.26.self_attn.o_proj": {
|
3320 |
"bias": null,
|
3321 |
"enable_norm": true,
|
3322 |
-
"enable_perm":
|
3323 |
"group_num": 1,
|
3324 |
"group_size": 5120,
|
3325 |
"in_features": 5120,
|
@@ -3343,7 +3343,7 @@
|
|
3343 |
"model.layers.26.self_attn.q_proj": {
|
3344 |
"bias": true,
|
3345 |
"enable_norm": true,
|
3346 |
-
"enable_perm":
|
3347 |
"group_num": 1,
|
3348 |
"group_size": 5120,
|
3349 |
"in_features": 5120,
|
@@ -3367,7 +3367,7 @@
|
|
3367 |
"model.layers.26.self_attn.v_proj": {
|
3368 |
"bias": true,
|
3369 |
"enable_norm": true,
|
3370 |
-
"enable_perm":
|
3371 |
"group_num": 1,
|
3372 |
"group_size": 5120,
|
3373 |
"in_features": 5120,
|
@@ -3391,7 +3391,7 @@
|
|
3391 |
"model.layers.27.mlp.down_proj": {
|
3392 |
"bias": null,
|
3393 |
"enable_norm": true,
|
3394 |
-
"enable_perm":
|
3395 |
"group_num": 1,
|
3396 |
"group_size": 13824,
|
3397 |
"in_features": 13824,
|
@@ -3415,7 +3415,7 @@
|
|
3415 |
"model.layers.27.mlp.gate_proj": {
|
3416 |
"bias": null,
|
3417 |
"enable_norm": true,
|
3418 |
-
"enable_perm":
|
3419 |
"group_num": 1,
|
3420 |
"group_size": 5120,
|
3421 |
"in_features": 5120,
|
@@ -3439,7 +3439,7 @@
|
|
3439 |
"model.layers.27.mlp.up_proj": {
|
3440 |
"bias": null,
|
3441 |
"enable_norm": true,
|
3442 |
-
"enable_perm":
|
3443 |
"group_num": 1,
|
3444 |
"group_size": 5120,
|
3445 |
"in_features": 5120,
|
@@ -3463,7 +3463,7 @@
|
|
3463 |
"model.layers.27.self_attn.k_proj": {
|
3464 |
"bias": true,
|
3465 |
"enable_norm": true,
|
3466 |
-
"enable_perm":
|
3467 |
"group_num": 1,
|
3468 |
"group_size": 5120,
|
3469 |
"in_features": 5120,
|
@@ -3487,7 +3487,7 @@
|
|
3487 |
"model.layers.27.self_attn.o_proj": {
|
3488 |
"bias": null,
|
3489 |
"enable_norm": true,
|
3490 |
-
"enable_perm":
|
3491 |
"group_num": 1,
|
3492 |
"group_size": 5120,
|
3493 |
"in_features": 5120,
|
@@ -3511,7 +3511,7 @@
|
|
3511 |
"model.layers.27.self_attn.q_proj": {
|
3512 |
"bias": true,
|
3513 |
"enable_norm": true,
|
3514 |
-
"enable_perm":
|
3515 |
"group_num": 1,
|
3516 |
"group_size": 5120,
|
3517 |
"in_features": 5120,
|
@@ -3535,7 +3535,7 @@
|
|
3535 |
"model.layers.27.self_attn.v_proj": {
|
3536 |
"bias": true,
|
3537 |
"enable_norm": true,
|
3538 |
-
"enable_perm":
|
3539 |
"group_num": 1,
|
3540 |
"group_size": 5120,
|
3541 |
"in_features": 5120,
|
@@ -3559,7 +3559,7 @@
|
|
3559 |
"model.layers.28.mlp.down_proj": {
|
3560 |
"bias": null,
|
3561 |
"enable_norm": true,
|
3562 |
-
"enable_perm":
|
3563 |
"group_num": 1,
|
3564 |
"group_size": 13824,
|
3565 |
"in_features": 13824,
|
@@ -3583,7 +3583,7 @@
|
|
3583 |
"model.layers.28.mlp.gate_proj": {
|
3584 |
"bias": null,
|
3585 |
"enable_norm": true,
|
3586 |
-
"enable_perm":
|
3587 |
"group_num": 1,
|
3588 |
"group_size": 5120,
|
3589 |
"in_features": 5120,
|
@@ -3607,7 +3607,7 @@
|
|
3607 |
"model.layers.28.mlp.up_proj": {
|
3608 |
"bias": null,
|
3609 |
"enable_norm": true,
|
3610 |
-
"enable_perm":
|
3611 |
"group_num": 1,
|
3612 |
"group_size": 5120,
|
3613 |
"in_features": 5120,
|
@@ -3631,7 +3631,7 @@
|
|
3631 |
"model.layers.28.self_attn.k_proj": {
|
3632 |
"bias": true,
|
3633 |
"enable_norm": true,
|
3634 |
-
"enable_perm":
|
3635 |
"group_num": 1,
|
3636 |
"group_size": 5120,
|
3637 |
"in_features": 5120,
|
@@ -3655,7 +3655,7 @@
|
|
3655 |
"model.layers.28.self_attn.o_proj": {
|
3656 |
"bias": null,
|
3657 |
"enable_norm": true,
|
3658 |
-
"enable_perm":
|
3659 |
"group_num": 1,
|
3660 |
"group_size": 5120,
|
3661 |
"in_features": 5120,
|
@@ -3679,7 +3679,7 @@
|
|
3679 |
"model.layers.28.self_attn.q_proj": {
|
3680 |
"bias": true,
|
3681 |
"enable_norm": true,
|
3682 |
-
"enable_perm":
|
3683 |
"group_num": 1,
|
3684 |
"group_size": 5120,
|
3685 |
"in_features": 5120,
|
@@ -3703,7 +3703,7 @@
|
|
3703 |
"model.layers.28.self_attn.v_proj": {
|
3704 |
"bias": true,
|
3705 |
"enable_norm": true,
|
3706 |
-
"enable_perm":
|
3707 |
"group_num": 1,
|
3708 |
"group_size": 5120,
|
3709 |
"in_features": 5120,
|
@@ -3727,7 +3727,7 @@
|
|
3727 |
"model.layers.29.mlp.down_proj": {
|
3728 |
"bias": null,
|
3729 |
"enable_norm": true,
|
3730 |
-
"enable_perm":
|
3731 |
"group_num": 1,
|
3732 |
"group_size": 13824,
|
3733 |
"in_features": 13824,
|
@@ -3751,7 +3751,7 @@
|
|
3751 |
"model.layers.29.mlp.gate_proj": {
|
3752 |
"bias": null,
|
3753 |
"enable_norm": true,
|
3754 |
-
"enable_perm":
|
3755 |
"group_num": 1,
|
3756 |
"group_size": 5120,
|
3757 |
"in_features": 5120,
|
@@ -3775,7 +3775,7 @@
|
|
3775 |
"model.layers.29.mlp.up_proj": {
|
3776 |
"bias": null,
|
3777 |
"enable_norm": true,
|
3778 |
-
"enable_perm":
|
3779 |
"group_num": 1,
|
3780 |
"group_size": 5120,
|
3781 |
"in_features": 5120,
|
@@ -3799,7 +3799,7 @@
|
|
3799 |
"model.layers.29.self_attn.k_proj": {
|
3800 |
"bias": true,
|
3801 |
"enable_norm": true,
|
3802 |
-
"enable_perm":
|
3803 |
"group_num": 1,
|
3804 |
"group_size": 5120,
|
3805 |
"in_features": 5120,
|
@@ -3823,7 +3823,7 @@
|
|
3823 |
"model.layers.29.self_attn.o_proj": {
|
3824 |
"bias": null,
|
3825 |
"enable_norm": true,
|
3826 |
-
"enable_perm":
|
3827 |
"group_num": 1,
|
3828 |
"group_size": 5120,
|
3829 |
"in_features": 5120,
|
@@ -3847,7 +3847,7 @@
|
|
3847 |
"model.layers.29.self_attn.q_proj": {
|
3848 |
"bias": true,
|
3849 |
"enable_norm": true,
|
3850 |
-
"enable_perm":
|
3851 |
"group_num": 1,
|
3852 |
"group_size": 5120,
|
3853 |
"in_features": 5120,
|
@@ -3871,7 +3871,7 @@
|
|
3871 |
"model.layers.29.self_attn.v_proj": {
|
3872 |
"bias": true,
|
3873 |
"enable_norm": true,
|
3874 |
-
"enable_perm":
|
3875 |
"group_num": 1,
|
3876 |
"group_size": 5120,
|
3877 |
"in_features": 5120,
|
@@ -3895,7 +3895,7 @@
|
|
3895 |
"model.layers.3.mlp.down_proj": {
|
3896 |
"bias": null,
|
3897 |
"enable_norm": true,
|
3898 |
-
"enable_perm":
|
3899 |
"group_num": 1,
|
3900 |
"group_size": 13824,
|
3901 |
"in_features": 13824,
|
@@ -3919,7 +3919,7 @@
|
|
3919 |
"model.layers.3.mlp.gate_proj": {
|
3920 |
"bias": null,
|
3921 |
"enable_norm": true,
|
3922 |
-
"enable_perm":
|
3923 |
"group_num": 1,
|
3924 |
"group_size": 5120,
|
3925 |
"in_features": 5120,
|
@@ -3943,7 +3943,7 @@
|
|
3943 |
"model.layers.3.mlp.up_proj": {
|
3944 |
"bias": null,
|
3945 |
"enable_norm": true,
|
3946 |
-
"enable_perm":
|
3947 |
"group_num": 1,
|
3948 |
"group_size": 5120,
|
3949 |
"in_features": 5120,
|
@@ -3967,7 +3967,7 @@
|
|
3967 |
"model.layers.3.self_attn.k_proj": {
|
3968 |
"bias": true,
|
3969 |
"enable_norm": true,
|
3970 |
-
"enable_perm":
|
3971 |
"group_num": 1,
|
3972 |
"group_size": 5120,
|
3973 |
"in_features": 5120,
|
@@ -3991,7 +3991,7 @@
|
|
3991 |
"model.layers.3.self_attn.o_proj": {
|
3992 |
"bias": null,
|
3993 |
"enable_norm": true,
|
3994 |
-
"enable_perm":
|
3995 |
"group_num": 1,
|
3996 |
"group_size": 5120,
|
3997 |
"in_features": 5120,
|
@@ -4015,7 +4015,7 @@
|
|
4015 |
"model.layers.3.self_attn.q_proj": {
|
4016 |
"bias": true,
|
4017 |
"enable_norm": true,
|
4018 |
-
"enable_perm":
|
4019 |
"group_num": 1,
|
4020 |
"group_size": 5120,
|
4021 |
"in_features": 5120,
|
@@ -4039,7 +4039,7 @@
|
|
4039 |
"model.layers.3.self_attn.v_proj": {
|
4040 |
"bias": true,
|
4041 |
"enable_norm": true,
|
4042 |
-
"enable_perm":
|
4043 |
"group_num": 1,
|
4044 |
"group_size": 5120,
|
4045 |
"in_features": 5120,
|
@@ -4063,7 +4063,7 @@
|
|
4063 |
"model.layers.30.mlp.down_proj": {
|
4064 |
"bias": null,
|
4065 |
"enable_norm": true,
|
4066 |
-
"enable_perm":
|
4067 |
"group_num": 1,
|
4068 |
"group_size": 13824,
|
4069 |
"in_features": 13824,
|
@@ -4087,7 +4087,7 @@
|
|
4087 |
"model.layers.30.mlp.gate_proj": {
|
4088 |
"bias": null,
|
4089 |
"enable_norm": true,
|
4090 |
-
"enable_perm":
|
4091 |
"group_num": 1,
|
4092 |
"group_size": 5120,
|
4093 |
"in_features": 5120,
|
@@ -4111,7 +4111,7 @@
|
|
4111 |
"model.layers.30.mlp.up_proj": {
|
4112 |
"bias": null,
|
4113 |
"enable_norm": true,
|
4114 |
-
"enable_perm":
|
4115 |
"group_num": 1,
|
4116 |
"group_size": 5120,
|
4117 |
"in_features": 5120,
|
@@ -4135,7 +4135,7 @@
|
|
4135 |
"model.layers.30.self_attn.k_proj": {
|
4136 |
"bias": true,
|
4137 |
"enable_norm": true,
|
4138 |
-
"enable_perm":
|
4139 |
"group_num": 1,
|
4140 |
"group_size": 5120,
|
4141 |
"in_features": 5120,
|
@@ -4159,7 +4159,7 @@
|
|
4159 |
"model.layers.30.self_attn.o_proj": {
|
4160 |
"bias": null,
|
4161 |
"enable_norm": true,
|
4162 |
-
"enable_perm":
|
4163 |
"group_num": 1,
|
4164 |
"group_size": 5120,
|
4165 |
"in_features": 5120,
|
@@ -4183,7 +4183,7 @@
|
|
4183 |
"model.layers.30.self_attn.q_proj": {
|
4184 |
"bias": true,
|
4185 |
"enable_norm": true,
|
4186 |
-
"enable_perm":
|
4187 |
"group_num": 1,
|
4188 |
"group_size": 5120,
|
4189 |
"in_features": 5120,
|
@@ -4207,7 +4207,7 @@
|
|
4207 |
"model.layers.30.self_attn.v_proj": {
|
4208 |
"bias": true,
|
4209 |
"enable_norm": true,
|
4210 |
-
"enable_perm":
|
4211 |
"group_num": 1,
|
4212 |
"group_size": 5120,
|
4213 |
"in_features": 5120,
|
@@ -4231,7 +4231,7 @@
|
|
4231 |
"model.layers.31.mlp.down_proj": {
|
4232 |
"bias": null,
|
4233 |
"enable_norm": true,
|
4234 |
-
"enable_perm":
|
4235 |
"group_num": 1,
|
4236 |
"group_size": 13824,
|
4237 |
"in_features": 13824,
|
@@ -4255,7 +4255,7 @@
|
|
4255 |
"model.layers.31.mlp.gate_proj": {
|
4256 |
"bias": null,
|
4257 |
"enable_norm": true,
|
4258 |
-
"enable_perm":
|
4259 |
"group_num": 1,
|
4260 |
"group_size": 5120,
|
4261 |
"in_features": 5120,
|
@@ -4279,7 +4279,7 @@
|
|
4279 |
"model.layers.31.mlp.up_proj": {
|
4280 |
"bias": null,
|
4281 |
"enable_norm": true,
|
4282 |
-
"enable_perm":
|
4283 |
"group_num": 1,
|
4284 |
"group_size": 5120,
|
4285 |
"in_features": 5120,
|
@@ -4303,7 +4303,7 @@
|
|
4303 |
"model.layers.31.self_attn.k_proj": {
|
4304 |
"bias": true,
|
4305 |
"enable_norm": true,
|
4306 |
-
"enable_perm":
|
4307 |
"group_num": 1,
|
4308 |
"group_size": 5120,
|
4309 |
"in_features": 5120,
|
@@ -4327,7 +4327,7 @@
|
|
4327 |
"model.layers.31.self_attn.o_proj": {
|
4328 |
"bias": null,
|
4329 |
"enable_norm": true,
|
4330 |
-
"enable_perm":
|
4331 |
"group_num": 1,
|
4332 |
"group_size": 5120,
|
4333 |
"in_features": 5120,
|
@@ -4351,7 +4351,7 @@
|
|
4351 |
"model.layers.31.self_attn.q_proj": {
|
4352 |
"bias": true,
|
4353 |
"enable_norm": true,
|
4354 |
-
"enable_perm":
|
4355 |
"group_num": 1,
|
4356 |
"group_size": 5120,
|
4357 |
"in_features": 5120,
|
@@ -4375,7 +4375,7 @@
|
|
4375 |
"model.layers.31.self_attn.v_proj": {
|
4376 |
"bias": true,
|
4377 |
"enable_norm": true,
|
4378 |
-
"enable_perm":
|
4379 |
"group_num": 1,
|
4380 |
"group_size": 5120,
|
4381 |
"in_features": 5120,
|
@@ -4399,7 +4399,7 @@
|
|
4399 |
"model.layers.32.mlp.down_proj": {
|
4400 |
"bias": null,
|
4401 |
"enable_norm": true,
|
4402 |
-
"enable_perm":
|
4403 |
"group_num": 1,
|
4404 |
"group_size": 13824,
|
4405 |
"in_features": 13824,
|
@@ -4423,7 +4423,7 @@
|
|
4423 |
"model.layers.32.mlp.gate_proj": {
|
4424 |
"bias": null,
|
4425 |
"enable_norm": true,
|
4426 |
-
"enable_perm":
|
4427 |
"group_num": 1,
|
4428 |
"group_size": 5120,
|
4429 |
"in_features": 5120,
|
@@ -4447,7 +4447,7 @@
|
|
4447 |
"model.layers.32.mlp.up_proj": {
|
4448 |
"bias": null,
|
4449 |
"enable_norm": true,
|
4450 |
-
"enable_perm":
|
4451 |
"group_num": 1,
|
4452 |
"group_size": 5120,
|
4453 |
"in_features": 5120,
|
@@ -4471,7 +4471,7 @@
|
|
4471 |
"model.layers.32.self_attn.k_proj": {
|
4472 |
"bias": true,
|
4473 |
"enable_norm": true,
|
4474 |
-
"enable_perm":
|
4475 |
"group_num": 1,
|
4476 |
"group_size": 5120,
|
4477 |
"in_features": 5120,
|
@@ -4495,7 +4495,7 @@
|
|
4495 |
"model.layers.32.self_attn.o_proj": {
|
4496 |
"bias": null,
|
4497 |
"enable_norm": true,
|
4498 |
-
"enable_perm":
|
4499 |
"group_num": 1,
|
4500 |
"group_size": 5120,
|
4501 |
"in_features": 5120,
|
@@ -4519,7 +4519,7 @@
|
|
4519 |
"model.layers.32.self_attn.q_proj": {
|
4520 |
"bias": true,
|
4521 |
"enable_norm": true,
|
4522 |
-
"enable_perm":
|
4523 |
"group_num": 1,
|
4524 |
"group_size": 5120,
|
4525 |
"in_features": 5120,
|
@@ -4543,7 +4543,7 @@
|
|
4543 |
"model.layers.32.self_attn.v_proj": {
|
4544 |
"bias": true,
|
4545 |
"enable_norm": true,
|
4546 |
-
"enable_perm":
|
4547 |
"group_num": 1,
|
4548 |
"group_size": 5120,
|
4549 |
"in_features": 5120,
|
@@ -4567,7 +4567,7 @@
|
|
4567 |
"model.layers.33.mlp.down_proj": {
|
4568 |
"bias": null,
|
4569 |
"enable_norm": true,
|
4570 |
-
"enable_perm":
|
4571 |
"group_num": 1,
|
4572 |
"group_size": 13824,
|
4573 |
"in_features": 13824,
|
@@ -4591,7 +4591,7 @@
|
|
4591 |
"model.layers.33.mlp.gate_proj": {
|
4592 |
"bias": null,
|
4593 |
"enable_norm": true,
|
4594 |
-
"enable_perm":
|
4595 |
"group_num": 1,
|
4596 |
"group_size": 5120,
|
4597 |
"in_features": 5120,
|
@@ -4615,7 +4615,7 @@
|
|
4615 |
"model.layers.33.mlp.up_proj": {
|
4616 |
"bias": null,
|
4617 |
"enable_norm": true,
|
4618 |
-
"enable_perm":
|
4619 |
"group_num": 1,
|
4620 |
"group_size": 5120,
|
4621 |
"in_features": 5120,
|
@@ -4639,7 +4639,7 @@
|
|
4639 |
"model.layers.33.self_attn.k_proj": {
|
4640 |
"bias": true,
|
4641 |
"enable_norm": true,
|
4642 |
-
"enable_perm":
|
4643 |
"group_num": 1,
|
4644 |
"group_size": 5120,
|
4645 |
"in_features": 5120,
|
@@ -4663,7 +4663,7 @@
|
|
4663 |
"model.layers.33.self_attn.o_proj": {
|
4664 |
"bias": null,
|
4665 |
"enable_norm": true,
|
4666 |
-
"enable_perm":
|
4667 |
"group_num": 1,
|
4668 |
"group_size": 5120,
|
4669 |
"in_features": 5120,
|
@@ -4687,7 +4687,7 @@
|
|
4687 |
"model.layers.33.self_attn.q_proj": {
|
4688 |
"bias": true,
|
4689 |
"enable_norm": true,
|
4690 |
-
"enable_perm":
|
4691 |
"group_num": 1,
|
4692 |
"group_size": 5120,
|
4693 |
"in_features": 5120,
|
@@ -4711,7 +4711,7 @@
|
|
4711 |
"model.layers.33.self_attn.v_proj": {
|
4712 |
"bias": true,
|
4713 |
"enable_norm": true,
|
4714 |
-
"enable_perm":
|
4715 |
"group_num": 1,
|
4716 |
"group_size": 5120,
|
4717 |
"in_features": 5120,
|
@@ -4735,7 +4735,7 @@
|
|
4735 |
"model.layers.34.mlp.down_proj": {
|
4736 |
"bias": null,
|
4737 |
"enable_norm": true,
|
4738 |
-
"enable_perm":
|
4739 |
"group_num": 1,
|
4740 |
"group_size": 13824,
|
4741 |
"in_features": 13824,
|
@@ -4759,7 +4759,7 @@
|
|
4759 |
"model.layers.34.mlp.gate_proj": {
|
4760 |
"bias": null,
|
4761 |
"enable_norm": true,
|
4762 |
-
"enable_perm":
|
4763 |
"group_num": 1,
|
4764 |
"group_size": 5120,
|
4765 |
"in_features": 5120,
|
@@ -4783,7 +4783,7 @@
|
|
4783 |
"model.layers.34.mlp.up_proj": {
|
4784 |
"bias": null,
|
4785 |
"enable_norm": true,
|
4786 |
-
"enable_perm":
|
4787 |
"group_num": 1,
|
4788 |
"group_size": 5120,
|
4789 |
"in_features": 5120,
|
@@ -4807,7 +4807,7 @@
|
|
4807 |
"model.layers.34.self_attn.k_proj": {
|
4808 |
"bias": true,
|
4809 |
"enable_norm": true,
|
4810 |
-
"enable_perm":
|
4811 |
"group_num": 1,
|
4812 |
"group_size": 5120,
|
4813 |
"in_features": 5120,
|
@@ -4831,7 +4831,7 @@
|
|
4831 |
"model.layers.34.self_attn.o_proj": {
|
4832 |
"bias": null,
|
4833 |
"enable_norm": true,
|
4834 |
-
"enable_perm":
|
4835 |
"group_num": 1,
|
4836 |
"group_size": 5120,
|
4837 |
"in_features": 5120,
|
@@ -4855,7 +4855,7 @@
|
|
4855 |
"model.layers.34.self_attn.q_proj": {
|
4856 |
"bias": true,
|
4857 |
"enable_norm": true,
|
4858 |
-
"enable_perm":
|
4859 |
"group_num": 1,
|
4860 |
"group_size": 5120,
|
4861 |
"in_features": 5120,
|
@@ -4879,7 +4879,7 @@
|
|
4879 |
"model.layers.34.self_attn.v_proj": {
|
4880 |
"bias": true,
|
4881 |
"enable_norm": true,
|
4882 |
-
"enable_perm":
|
4883 |
"group_num": 1,
|
4884 |
"group_size": 5120,
|
4885 |
"in_features": 5120,
|
@@ -4903,7 +4903,7 @@
|
|
4903 |
"model.layers.35.mlp.down_proj": {
|
4904 |
"bias": null,
|
4905 |
"enable_norm": true,
|
4906 |
-
"enable_perm":
|
4907 |
"group_num": 1,
|
4908 |
"group_size": 13824,
|
4909 |
"in_features": 13824,
|
@@ -4927,7 +4927,7 @@
|
|
4927 |
"model.layers.35.mlp.gate_proj": {
|
4928 |
"bias": null,
|
4929 |
"enable_norm": true,
|
4930 |
-
"enable_perm":
|
4931 |
"group_num": 1,
|
4932 |
"group_size": 5120,
|
4933 |
"in_features": 5120,
|
@@ -4951,7 +4951,7 @@
|
|
4951 |
"model.layers.35.mlp.up_proj": {
|
4952 |
"bias": null,
|
4953 |
"enable_norm": true,
|
4954 |
-
"enable_perm":
|
4955 |
"group_num": 1,
|
4956 |
"group_size": 5120,
|
4957 |
"in_features": 5120,
|
@@ -4975,7 +4975,7 @@
|
|
4975 |
"model.layers.35.self_attn.k_proj": {
|
4976 |
"bias": true,
|
4977 |
"enable_norm": true,
|
4978 |
-
"enable_perm":
|
4979 |
"group_num": 1,
|
4980 |
"group_size": 5120,
|
4981 |
"in_features": 5120,
|
@@ -4999,7 +4999,7 @@
|
|
4999 |
"model.layers.35.self_attn.o_proj": {
|
5000 |
"bias": null,
|
5001 |
"enable_norm": true,
|
5002 |
-
"enable_perm":
|
5003 |
"group_num": 1,
|
5004 |
"group_size": 5120,
|
5005 |
"in_features": 5120,
|
@@ -5023,7 +5023,7 @@
|
|
5023 |
"model.layers.35.self_attn.q_proj": {
|
5024 |
"bias": true,
|
5025 |
"enable_norm": true,
|
5026 |
-
"enable_perm":
|
5027 |
"group_num": 1,
|
5028 |
"group_size": 5120,
|
5029 |
"in_features": 5120,
|
@@ -5047,7 +5047,7 @@
|
|
5047 |
"model.layers.35.self_attn.v_proj": {
|
5048 |
"bias": true,
|
5049 |
"enable_norm": true,
|
5050 |
-
"enable_perm":
|
5051 |
"group_num": 1,
|
5052 |
"group_size": 5120,
|
5053 |
"in_features": 5120,
|
@@ -5071,7 +5071,7 @@
|
|
5071 |
"model.layers.36.mlp.down_proj": {
|
5072 |
"bias": null,
|
5073 |
"enable_norm": true,
|
5074 |
-
"enable_perm":
|
5075 |
"group_num": 1,
|
5076 |
"group_size": 13824,
|
5077 |
"in_features": 13824,
|
@@ -5095,7 +5095,7 @@
|
|
5095 |
"model.layers.36.mlp.gate_proj": {
|
5096 |
"bias": null,
|
5097 |
"enable_norm": true,
|
5098 |
-
"enable_perm":
|
5099 |
"group_num": 1,
|
5100 |
"group_size": 5120,
|
5101 |
"in_features": 5120,
|
@@ -5119,7 +5119,7 @@
|
|
5119 |
"model.layers.36.mlp.up_proj": {
|
5120 |
"bias": null,
|
5121 |
"enable_norm": true,
|
5122 |
-
"enable_perm":
|
5123 |
"group_num": 1,
|
5124 |
"group_size": 5120,
|
5125 |
"in_features": 5120,
|
@@ -5143,7 +5143,7 @@
|
|
5143 |
"model.layers.36.self_attn.k_proj": {
|
5144 |
"bias": true,
|
5145 |
"enable_norm": true,
|
5146 |
-
"enable_perm":
|
5147 |
"group_num": 1,
|
5148 |
"group_size": 5120,
|
5149 |
"in_features": 5120,
|
@@ -5167,7 +5167,7 @@
|
|
5167 |
"model.layers.36.self_attn.o_proj": {
|
5168 |
"bias": null,
|
5169 |
"enable_norm": true,
|
5170 |
-
"enable_perm":
|
5171 |
"group_num": 1,
|
5172 |
"group_size": 5120,
|
5173 |
"in_features": 5120,
|
@@ -5191,7 +5191,7 @@
|
|
5191 |
"model.layers.36.self_attn.q_proj": {
|
5192 |
"bias": true,
|
5193 |
"enable_norm": true,
|
5194 |
-
"enable_perm":
|
5195 |
"group_num": 1,
|
5196 |
"group_size": 5120,
|
5197 |
"in_features": 5120,
|
@@ -5215,7 +5215,7 @@
|
|
5215 |
"model.layers.36.self_attn.v_proj": {
|
5216 |
"bias": true,
|
5217 |
"enable_norm": true,
|
5218 |
-
"enable_perm":
|
5219 |
"group_num": 1,
|
5220 |
"group_size": 5120,
|
5221 |
"in_features": 5120,
|
@@ -5239,7 +5239,7 @@
|
|
5239 |
"model.layers.37.mlp.down_proj": {
|
5240 |
"bias": null,
|
5241 |
"enable_norm": true,
|
5242 |
-
"enable_perm":
|
5243 |
"group_num": 1,
|
5244 |
"group_size": 13824,
|
5245 |
"in_features": 13824,
|
@@ -5263,7 +5263,7 @@
|
|
5263 |
"model.layers.37.mlp.gate_proj": {
|
5264 |
"bias": null,
|
5265 |
"enable_norm": true,
|
5266 |
-
"enable_perm":
|
5267 |
"group_num": 1,
|
5268 |
"group_size": 5120,
|
5269 |
"in_features": 5120,
|
@@ -5287,7 +5287,7 @@
|
|
5287 |
"model.layers.37.mlp.up_proj": {
|
5288 |
"bias": null,
|
5289 |
"enable_norm": true,
|
5290 |
-
"enable_perm":
|
5291 |
"group_num": 1,
|
5292 |
"group_size": 5120,
|
5293 |
"in_features": 5120,
|
@@ -5311,7 +5311,7 @@
|
|
5311 |
"model.layers.37.self_attn.k_proj": {
|
5312 |
"bias": true,
|
5313 |
"enable_norm": true,
|
5314 |
-
"enable_perm":
|
5315 |
"group_num": 1,
|
5316 |
"group_size": 5120,
|
5317 |
"in_features": 5120,
|
@@ -5335,7 +5335,7 @@
|
|
5335 |
"model.layers.37.self_attn.o_proj": {
|
5336 |
"bias": null,
|
5337 |
"enable_norm": true,
|
5338 |
-
"enable_perm":
|
5339 |
"group_num": 1,
|
5340 |
"group_size": 5120,
|
5341 |
"in_features": 5120,
|
@@ -5359,7 +5359,7 @@
|
|
5359 |
"model.layers.37.self_attn.q_proj": {
|
5360 |
"bias": true,
|
5361 |
"enable_norm": true,
|
5362 |
-
"enable_perm":
|
5363 |
"group_num": 1,
|
5364 |
"group_size": 5120,
|
5365 |
"in_features": 5120,
|
@@ -5383,7 +5383,7 @@
|
|
5383 |
"model.layers.37.self_attn.v_proj": {
|
5384 |
"bias": true,
|
5385 |
"enable_norm": true,
|
5386 |
-
"enable_perm":
|
5387 |
"group_num": 1,
|
5388 |
"group_size": 5120,
|
5389 |
"in_features": 5120,
|
@@ -5407,7 +5407,7 @@
|
|
5407 |
"model.layers.38.mlp.down_proj": {
|
5408 |
"bias": null,
|
5409 |
"enable_norm": true,
|
5410 |
-
"enable_perm":
|
5411 |
"group_num": 1,
|
5412 |
"group_size": 13824,
|
5413 |
"in_features": 13824,
|
@@ -5431,7 +5431,7 @@
|
|
5431 |
"model.layers.38.mlp.gate_proj": {
|
5432 |
"bias": null,
|
5433 |
"enable_norm": true,
|
5434 |
-
"enable_perm":
|
5435 |
"group_num": 1,
|
5436 |
"group_size": 5120,
|
5437 |
"in_features": 5120,
|
@@ -5455,7 +5455,7 @@
|
|
5455 |
"model.layers.38.mlp.up_proj": {
|
5456 |
"bias": null,
|
5457 |
"enable_norm": true,
|
5458 |
-
"enable_perm":
|
5459 |
"group_num": 1,
|
5460 |
"group_size": 5120,
|
5461 |
"in_features": 5120,
|
@@ -5479,7 +5479,7 @@
|
|
5479 |
"model.layers.38.self_attn.k_proj": {
|
5480 |
"bias": true,
|
5481 |
"enable_norm": true,
|
5482 |
-
"enable_perm":
|
5483 |
"group_num": 1,
|
5484 |
"group_size": 5120,
|
5485 |
"in_features": 5120,
|
@@ -5503,7 +5503,7 @@
|
|
5503 |
"model.layers.38.self_attn.o_proj": {
|
5504 |
"bias": null,
|
5505 |
"enable_norm": true,
|
5506 |
-
"enable_perm":
|
5507 |
"group_num": 1,
|
5508 |
"group_size": 5120,
|
5509 |
"in_features": 5120,
|
@@ -5527,7 +5527,7 @@
|
|
5527 |
"model.layers.38.self_attn.q_proj": {
|
5528 |
"bias": true,
|
5529 |
"enable_norm": true,
|
5530 |
-
"enable_perm":
|
5531 |
"group_num": 1,
|
5532 |
"group_size": 5120,
|
5533 |
"in_features": 5120,
|
@@ -5551,7 +5551,7 @@
|
|
5551 |
"model.layers.38.self_attn.v_proj": {
|
5552 |
"bias": true,
|
5553 |
"enable_norm": true,
|
5554 |
-
"enable_perm":
|
5555 |
"group_num": 1,
|
5556 |
"group_size": 5120,
|
5557 |
"in_features": 5120,
|
@@ -5575,7 +5575,7 @@
|
|
5575 |
"model.layers.39.mlp.down_proj": {
|
5576 |
"bias": null,
|
5577 |
"enable_norm": true,
|
5578 |
-
"enable_perm":
|
5579 |
"group_num": 1,
|
5580 |
"group_size": 13824,
|
5581 |
"in_features": 13824,
|
@@ -5599,7 +5599,7 @@
|
|
5599 |
"model.layers.39.mlp.gate_proj": {
|
5600 |
"bias": null,
|
5601 |
"enable_norm": true,
|
5602 |
-
"enable_perm":
|
5603 |
"group_num": 1,
|
5604 |
"group_size": 5120,
|
5605 |
"in_features": 5120,
|
@@ -5623,7 +5623,7 @@
|
|
5623 |
"model.layers.39.mlp.up_proj": {
|
5624 |
"bias": null,
|
5625 |
"enable_norm": true,
|
5626 |
-
"enable_perm":
|
5627 |
"group_num": 1,
|
5628 |
"group_size": 5120,
|
5629 |
"in_features": 5120,
|
@@ -5647,7 +5647,7 @@
|
|
5647 |
"model.layers.39.self_attn.k_proj": {
|
5648 |
"bias": true,
|
5649 |
"enable_norm": true,
|
5650 |
-
"enable_perm":
|
5651 |
"group_num": 1,
|
5652 |
"group_size": 5120,
|
5653 |
"in_features": 5120,
|
@@ -5671,7 +5671,7 @@
|
|
5671 |
"model.layers.39.self_attn.o_proj": {
|
5672 |
"bias": null,
|
5673 |
"enable_norm": true,
|
5674 |
-
"enable_perm":
|
5675 |
"group_num": 1,
|
5676 |
"group_size": 5120,
|
5677 |
"in_features": 5120,
|
@@ -5695,7 +5695,7 @@
|
|
5695 |
"model.layers.39.self_attn.q_proj": {
|
5696 |
"bias": true,
|
5697 |
"enable_norm": true,
|
5698 |
-
"enable_perm":
|
5699 |
"group_num": 1,
|
5700 |
"group_size": 5120,
|
5701 |
"in_features": 5120,
|
@@ -5719,7 +5719,7 @@
|
|
5719 |
"model.layers.39.self_attn.v_proj": {
|
5720 |
"bias": true,
|
5721 |
"enable_norm": true,
|
5722 |
-
"enable_perm":
|
5723 |
"group_num": 1,
|
5724 |
"group_size": 5120,
|
5725 |
"in_features": 5120,
|
@@ -5743,7 +5743,7 @@
|
|
5743 |
"model.layers.4.mlp.down_proj": {
|
5744 |
"bias": null,
|
5745 |
"enable_norm": true,
|
5746 |
-
"enable_perm":
|
5747 |
"group_num": 1,
|
5748 |
"group_size": 13824,
|
5749 |
"in_features": 13824,
|
@@ -5767,7 +5767,7 @@
|
|
5767 |
"model.layers.4.mlp.gate_proj": {
|
5768 |
"bias": null,
|
5769 |
"enable_norm": true,
|
5770 |
-
"enable_perm":
|
5771 |
"group_num": 1,
|
5772 |
"group_size": 5120,
|
5773 |
"in_features": 5120,
|
@@ -5791,7 +5791,7 @@
|
|
5791 |
"model.layers.4.mlp.up_proj": {
|
5792 |
"bias": null,
|
5793 |
"enable_norm": true,
|
5794 |
-
"enable_perm":
|
5795 |
"group_num": 1,
|
5796 |
"group_size": 5120,
|
5797 |
"in_features": 5120,
|
@@ -5815,7 +5815,7 @@
|
|
5815 |
"model.layers.4.self_attn.k_proj": {
|
5816 |
"bias": true,
|
5817 |
"enable_norm": true,
|
5818 |
-
"enable_perm":
|
5819 |
"group_num": 1,
|
5820 |
"group_size": 5120,
|
5821 |
"in_features": 5120,
|
@@ -5839,7 +5839,7 @@
|
|
5839 |
"model.layers.4.self_attn.o_proj": {
|
5840 |
"bias": null,
|
5841 |
"enable_norm": true,
|
5842 |
-
"enable_perm":
|
5843 |
"group_num": 1,
|
5844 |
"group_size": 5120,
|
5845 |
"in_features": 5120,
|
@@ -5863,7 +5863,7 @@
|
|
5863 |
"model.layers.4.self_attn.q_proj": {
|
5864 |
"bias": true,
|
5865 |
"enable_norm": true,
|
5866 |
-
"enable_perm":
|
5867 |
"group_num": 1,
|
5868 |
"group_size": 5120,
|
5869 |
"in_features": 5120,
|
@@ -5887,7 +5887,7 @@
|
|
5887 |
"model.layers.4.self_attn.v_proj": {
|
5888 |
"bias": true,
|
5889 |
"enable_norm": true,
|
5890 |
-
"enable_perm":
|
5891 |
"group_num": 1,
|
5892 |
"group_size": 5120,
|
5893 |
"in_features": 5120,
|
@@ -5911,7 +5911,7 @@
|
|
5911 |
"model.layers.40.mlp.down_proj": {
|
5912 |
"bias": null,
|
5913 |
"enable_norm": true,
|
5914 |
-
"enable_perm":
|
5915 |
"group_num": 1,
|
5916 |
"group_size": 13824,
|
5917 |
"in_features": 13824,
|
@@ -5935,7 +5935,7 @@
|
|
5935 |
"model.layers.40.mlp.gate_proj": {
|
5936 |
"bias": null,
|
5937 |
"enable_norm": true,
|
5938 |
-
"enable_perm":
|
5939 |
"group_num": 1,
|
5940 |
"group_size": 5120,
|
5941 |
"in_features": 5120,
|
@@ -5959,7 +5959,7 @@
|
|
5959 |
"model.layers.40.mlp.up_proj": {
|
5960 |
"bias": null,
|
5961 |
"enable_norm": true,
|
5962 |
-
"enable_perm":
|
5963 |
"group_num": 1,
|
5964 |
"group_size": 5120,
|
5965 |
"in_features": 5120,
|
@@ -5983,7 +5983,7 @@
|
|
5983 |
"model.layers.40.self_attn.k_proj": {
|
5984 |
"bias": true,
|
5985 |
"enable_norm": true,
|
5986 |
-
"enable_perm":
|
5987 |
"group_num": 1,
|
5988 |
"group_size": 5120,
|
5989 |
"in_features": 5120,
|
@@ -6007,7 +6007,7 @@
|
|
6007 |
"model.layers.40.self_attn.o_proj": {
|
6008 |
"bias": null,
|
6009 |
"enable_norm": true,
|
6010 |
-
"enable_perm":
|
6011 |
"group_num": 1,
|
6012 |
"group_size": 5120,
|
6013 |
"in_features": 5120,
|
@@ -6031,7 +6031,7 @@
|
|
6031 |
"model.layers.40.self_attn.q_proj": {
|
6032 |
"bias": true,
|
6033 |
"enable_norm": true,
|
6034 |
-
"enable_perm":
|
6035 |
"group_num": 1,
|
6036 |
"group_size": 5120,
|
6037 |
"in_features": 5120,
|
@@ -6055,7 +6055,7 @@
|
|
6055 |
"model.layers.40.self_attn.v_proj": {
|
6056 |
"bias": true,
|
6057 |
"enable_norm": true,
|
6058 |
-
"enable_perm":
|
6059 |
"group_num": 1,
|
6060 |
"group_size": 5120,
|
6061 |
"in_features": 5120,
|
@@ -6079,7 +6079,7 @@
|
|
6079 |
"model.layers.41.mlp.down_proj": {
|
6080 |
"bias": null,
|
6081 |
"enable_norm": true,
|
6082 |
-
"enable_perm":
|
6083 |
"group_num": 1,
|
6084 |
"group_size": 13824,
|
6085 |
"in_features": 13824,
|
@@ -6103,7 +6103,7 @@
|
|
6103 |
"model.layers.41.mlp.gate_proj": {
|
6104 |
"bias": null,
|
6105 |
"enable_norm": true,
|
6106 |
-
"enable_perm":
|
6107 |
"group_num": 1,
|
6108 |
"group_size": 5120,
|
6109 |
"in_features": 5120,
|
@@ -6127,7 +6127,7 @@
|
|
6127 |
"model.layers.41.mlp.up_proj": {
|
6128 |
"bias": null,
|
6129 |
"enable_norm": true,
|
6130 |
-
"enable_perm":
|
6131 |
"group_num": 1,
|
6132 |
"group_size": 5120,
|
6133 |
"in_features": 5120,
|
@@ -6151,7 +6151,7 @@
|
|
6151 |
"model.layers.41.self_attn.k_proj": {
|
6152 |
"bias": true,
|
6153 |
"enable_norm": true,
|
6154 |
-
"enable_perm":
|
6155 |
"group_num": 1,
|
6156 |
"group_size": 5120,
|
6157 |
"in_features": 5120,
|
@@ -6175,7 +6175,7 @@
|
|
6175 |
"model.layers.41.self_attn.o_proj": {
|
6176 |
"bias": null,
|
6177 |
"enable_norm": true,
|
6178 |
-
"enable_perm":
|
6179 |
"group_num": 1,
|
6180 |
"group_size": 5120,
|
6181 |
"in_features": 5120,
|
@@ -6199,7 +6199,7 @@
|
|
6199 |
"model.layers.41.self_attn.q_proj": {
|
6200 |
"bias": true,
|
6201 |
"enable_norm": true,
|
6202 |
-
"enable_perm":
|
6203 |
"group_num": 1,
|
6204 |
"group_size": 5120,
|
6205 |
"in_features": 5120,
|
@@ -6223,7 +6223,7 @@
|
|
6223 |
"model.layers.41.self_attn.v_proj": {
|
6224 |
"bias": true,
|
6225 |
"enable_norm": true,
|
6226 |
-
"enable_perm":
|
6227 |
"group_num": 1,
|
6228 |
"group_size": 5120,
|
6229 |
"in_features": 5120,
|
@@ -6247,7 +6247,7 @@
|
|
6247 |
"model.layers.42.mlp.down_proj": {
|
6248 |
"bias": null,
|
6249 |
"enable_norm": true,
|
6250 |
-
"enable_perm":
|
6251 |
"group_num": 1,
|
6252 |
"group_size": 13824,
|
6253 |
"in_features": 13824,
|
@@ -6271,7 +6271,7 @@
|
|
6271 |
"model.layers.42.mlp.gate_proj": {
|
6272 |
"bias": null,
|
6273 |
"enable_norm": true,
|
6274 |
-
"enable_perm":
|
6275 |
"group_num": 1,
|
6276 |
"group_size": 5120,
|
6277 |
"in_features": 5120,
|
@@ -6295,7 +6295,7 @@
|
|
6295 |
"model.layers.42.mlp.up_proj": {
|
6296 |
"bias": null,
|
6297 |
"enable_norm": true,
|
6298 |
-
"enable_perm":
|
6299 |
"group_num": 1,
|
6300 |
"group_size": 5120,
|
6301 |
"in_features": 5120,
|
@@ -6319,7 +6319,7 @@
|
|
6319 |
"model.layers.42.self_attn.k_proj": {
|
6320 |
"bias": true,
|
6321 |
"enable_norm": true,
|
6322 |
-
"enable_perm":
|
6323 |
"group_num": 1,
|
6324 |
"group_size": 5120,
|
6325 |
"in_features": 5120,
|
@@ -6343,7 +6343,7 @@
|
|
6343 |
"model.layers.42.self_attn.o_proj": {
|
6344 |
"bias": null,
|
6345 |
"enable_norm": true,
|
6346 |
-
"enable_perm":
|
6347 |
"group_num": 1,
|
6348 |
"group_size": 5120,
|
6349 |
"in_features": 5120,
|
@@ -6367,7 +6367,7 @@
|
|
6367 |
"model.layers.42.self_attn.q_proj": {
|
6368 |
"bias": true,
|
6369 |
"enable_norm": true,
|
6370 |
-
"enable_perm":
|
6371 |
"group_num": 1,
|
6372 |
"group_size": 5120,
|
6373 |
"in_features": 5120,
|
@@ -6391,7 +6391,7 @@
|
|
6391 |
"model.layers.42.self_attn.v_proj": {
|
6392 |
"bias": true,
|
6393 |
"enable_norm": true,
|
6394 |
-
"enable_perm":
|
6395 |
"group_num": 1,
|
6396 |
"group_size": 5120,
|
6397 |
"in_features": 5120,
|
@@ -6415,7 +6415,7 @@
|
|
6415 |
"model.layers.43.mlp.down_proj": {
|
6416 |
"bias": null,
|
6417 |
"enable_norm": true,
|
6418 |
-
"enable_perm":
|
6419 |
"group_num": 1,
|
6420 |
"group_size": 13824,
|
6421 |
"in_features": 13824,
|
@@ -6439,7 +6439,7 @@
|
|
6439 |
"model.layers.43.mlp.gate_proj": {
|
6440 |
"bias": null,
|
6441 |
"enable_norm": true,
|
6442 |
-
"enable_perm":
|
6443 |
"group_num": 1,
|
6444 |
"group_size": 5120,
|
6445 |
"in_features": 5120,
|
@@ -6463,7 +6463,7 @@
|
|
6463 |
"model.layers.43.mlp.up_proj": {
|
6464 |
"bias": null,
|
6465 |
"enable_norm": true,
|
6466 |
-
"enable_perm":
|
6467 |
"group_num": 1,
|
6468 |
"group_size": 5120,
|
6469 |
"in_features": 5120,
|
@@ -6487,7 +6487,7 @@
|
|
6487 |
"model.layers.43.self_attn.k_proj": {
|
6488 |
"bias": true,
|
6489 |
"enable_norm": true,
|
6490 |
-
"enable_perm":
|
6491 |
"group_num": 1,
|
6492 |
"group_size": 5120,
|
6493 |
"in_features": 5120,
|
@@ -6511,7 +6511,7 @@
|
|
6511 |
"model.layers.43.self_attn.o_proj": {
|
6512 |
"bias": null,
|
6513 |
"enable_norm": true,
|
6514 |
-
"enable_perm":
|
6515 |
"group_num": 1,
|
6516 |
"group_size": 5120,
|
6517 |
"in_features": 5120,
|
@@ -6535,7 +6535,7 @@
|
|
6535 |
"model.layers.43.self_attn.q_proj": {
|
6536 |
"bias": true,
|
6537 |
"enable_norm": true,
|
6538 |
-
"enable_perm":
|
6539 |
"group_num": 1,
|
6540 |
"group_size": 5120,
|
6541 |
"in_features": 5120,
|
@@ -6559,7 +6559,7 @@
|
|
6559 |
"model.layers.43.self_attn.v_proj": {
|
6560 |
"bias": true,
|
6561 |
"enable_norm": true,
|
6562 |
-
"enable_perm":
|
6563 |
"group_num": 1,
|
6564 |
"group_size": 5120,
|
6565 |
"in_features": 5120,
|
@@ -6583,7 +6583,7 @@
|
|
6583 |
"model.layers.44.mlp.down_proj": {
|
6584 |
"bias": null,
|
6585 |
"enable_norm": true,
|
6586 |
-
"enable_perm":
|
6587 |
"group_num": 1,
|
6588 |
"group_size": 13824,
|
6589 |
"in_features": 13824,
|
@@ -6607,7 +6607,7 @@
|
|
6607 |
"model.layers.44.mlp.gate_proj": {
|
6608 |
"bias": null,
|
6609 |
"enable_norm": true,
|
6610 |
-
"enable_perm":
|
6611 |
"group_num": 1,
|
6612 |
"group_size": 5120,
|
6613 |
"in_features": 5120,
|
@@ -6631,7 +6631,7 @@
|
|
6631 |
"model.layers.44.mlp.up_proj": {
|
6632 |
"bias": null,
|
6633 |
"enable_norm": true,
|
6634 |
-
"enable_perm":
|
6635 |
"group_num": 1,
|
6636 |
"group_size": 5120,
|
6637 |
"in_features": 5120,
|
@@ -6655,7 +6655,7 @@
|
|
6655 |
"model.layers.44.self_attn.k_proj": {
|
6656 |
"bias": true,
|
6657 |
"enable_norm": true,
|
6658 |
-
"enable_perm":
|
6659 |
"group_num": 1,
|
6660 |
"group_size": 5120,
|
6661 |
"in_features": 5120,
|
@@ -6679,7 +6679,7 @@
|
|
6679 |
"model.layers.44.self_attn.o_proj": {
|
6680 |
"bias": null,
|
6681 |
"enable_norm": true,
|
6682 |
-
"enable_perm":
|
6683 |
"group_num": 1,
|
6684 |
"group_size": 5120,
|
6685 |
"in_features": 5120,
|
@@ -6703,7 +6703,7 @@
|
|
6703 |
"model.layers.44.self_attn.q_proj": {
|
6704 |
"bias": true,
|
6705 |
"enable_norm": true,
|
6706 |
-
"enable_perm":
|
6707 |
"group_num": 1,
|
6708 |
"group_size": 5120,
|
6709 |
"in_features": 5120,
|
@@ -6727,7 +6727,7 @@
|
|
6727 |
"model.layers.44.self_attn.v_proj": {
|
6728 |
"bias": true,
|
6729 |
"enable_norm": true,
|
6730 |
-
"enable_perm":
|
6731 |
"group_num": 1,
|
6732 |
"group_size": 5120,
|
6733 |
"in_features": 5120,
|
@@ -6751,7 +6751,7 @@
|
|
6751 |
"model.layers.45.mlp.down_proj": {
|
6752 |
"bias": null,
|
6753 |
"enable_norm": true,
|
6754 |
-
"enable_perm":
|
6755 |
"group_num": 1,
|
6756 |
"group_size": 13824,
|
6757 |
"in_features": 13824,
|
@@ -6775,7 +6775,7 @@
|
|
6775 |
"model.layers.45.mlp.gate_proj": {
|
6776 |
"bias": null,
|
6777 |
"enable_norm": true,
|
6778 |
-
"enable_perm":
|
6779 |
"group_num": 1,
|
6780 |
"group_size": 5120,
|
6781 |
"in_features": 5120,
|
@@ -6799,7 +6799,7 @@
|
|
6799 |
"model.layers.45.mlp.up_proj": {
|
6800 |
"bias": null,
|
6801 |
"enable_norm": true,
|
6802 |
-
"enable_perm":
|
6803 |
"group_num": 1,
|
6804 |
"group_size": 5120,
|
6805 |
"in_features": 5120,
|
@@ -6823,7 +6823,7 @@
|
|
6823 |
"model.layers.45.self_attn.k_proj": {
|
6824 |
"bias": true,
|
6825 |
"enable_norm": true,
|
6826 |
-
"enable_perm":
|
6827 |
"group_num": 1,
|
6828 |
"group_size": 5120,
|
6829 |
"in_features": 5120,
|
@@ -6847,7 +6847,7 @@
|
|
6847 |
"model.layers.45.self_attn.o_proj": {
|
6848 |
"bias": null,
|
6849 |
"enable_norm": true,
|
6850 |
-
"enable_perm":
|
6851 |
"group_num": 1,
|
6852 |
"group_size": 5120,
|
6853 |
"in_features": 5120,
|
@@ -6871,7 +6871,7 @@
|
|
6871 |
"model.layers.45.self_attn.q_proj": {
|
6872 |
"bias": true,
|
6873 |
"enable_norm": true,
|
6874 |
-
"enable_perm":
|
6875 |
"group_num": 1,
|
6876 |
"group_size": 5120,
|
6877 |
"in_features": 5120,
|
@@ -6895,7 +6895,7 @@
|
|
6895 |
"model.layers.45.self_attn.v_proj": {
|
6896 |
"bias": true,
|
6897 |
"enable_norm": true,
|
6898 |
-
"enable_perm":
|
6899 |
"group_num": 1,
|
6900 |
"group_size": 5120,
|
6901 |
"in_features": 5120,
|
@@ -6919,7 +6919,7 @@
|
|
6919 |
"model.layers.46.mlp.down_proj": {
|
6920 |
"bias": null,
|
6921 |
"enable_norm": true,
|
6922 |
-
"enable_perm":
|
6923 |
"group_num": 1,
|
6924 |
"group_size": 13824,
|
6925 |
"in_features": 13824,
|
@@ -6943,7 +6943,7 @@
|
|
6943 |
"model.layers.46.mlp.gate_proj": {
|
6944 |
"bias": null,
|
6945 |
"enable_norm": true,
|
6946 |
-
"enable_perm":
|
6947 |
"group_num": 1,
|
6948 |
"group_size": 5120,
|
6949 |
"in_features": 5120,
|
@@ -6967,7 +6967,7 @@
|
|
6967 |
"model.layers.46.mlp.up_proj": {
|
6968 |
"bias": null,
|
6969 |
"enable_norm": true,
|
6970 |
-
"enable_perm":
|
6971 |
"group_num": 1,
|
6972 |
"group_size": 5120,
|
6973 |
"in_features": 5120,
|
@@ -6991,7 +6991,7 @@
|
|
6991 |
"model.layers.46.self_attn.k_proj": {
|
6992 |
"bias": true,
|
6993 |
"enable_norm": true,
|
6994 |
-
"enable_perm":
|
6995 |
"group_num": 1,
|
6996 |
"group_size": 5120,
|
6997 |
"in_features": 5120,
|
@@ -7015,7 +7015,7 @@
|
|
7015 |
"model.layers.46.self_attn.o_proj": {
|
7016 |
"bias": null,
|
7017 |
"enable_norm": true,
|
7018 |
-
"enable_perm":
|
7019 |
"group_num": 1,
|
7020 |
"group_size": 5120,
|
7021 |
"in_features": 5120,
|
@@ -7039,7 +7039,7 @@
|
|
7039 |
"model.layers.46.self_attn.q_proj": {
|
7040 |
"bias": true,
|
7041 |
"enable_norm": true,
|
7042 |
-
"enable_perm":
|
7043 |
"group_num": 1,
|
7044 |
"group_size": 5120,
|
7045 |
"in_features": 5120,
|
@@ -7063,7 +7063,7 @@
|
|
7063 |
"model.layers.46.self_attn.v_proj": {
|
7064 |
"bias": true,
|
7065 |
"enable_norm": true,
|
7066 |
-
"enable_perm":
|
7067 |
"group_num": 1,
|
7068 |
"group_size": 5120,
|
7069 |
"in_features": 5120,
|
@@ -7087,7 +7087,7 @@
|
|
7087 |
"model.layers.47.mlp.down_proj": {
|
7088 |
"bias": null,
|
7089 |
"enable_norm": true,
|
7090 |
-
"enable_perm":
|
7091 |
"group_num": 1,
|
7092 |
"group_size": 13824,
|
7093 |
"in_features": 13824,
|
@@ -7111,7 +7111,7 @@
|
|
7111 |
"model.layers.47.mlp.gate_proj": {
|
7112 |
"bias": null,
|
7113 |
"enable_norm": true,
|
7114 |
-
"enable_perm":
|
7115 |
"group_num": 1,
|
7116 |
"group_size": 5120,
|
7117 |
"in_features": 5120,
|
@@ -7135,7 +7135,7 @@
|
|
7135 |
"model.layers.47.mlp.up_proj": {
|
7136 |
"bias": null,
|
7137 |
"enable_norm": true,
|
7138 |
-
"enable_perm":
|
7139 |
"group_num": 1,
|
7140 |
"group_size": 5120,
|
7141 |
"in_features": 5120,
|
@@ -7159,7 +7159,7 @@
|
|
7159 |
"model.layers.47.self_attn.k_proj": {
|
7160 |
"bias": true,
|
7161 |
"enable_norm": true,
|
7162 |
-
"enable_perm":
|
7163 |
"group_num": 1,
|
7164 |
"group_size": 5120,
|
7165 |
"in_features": 5120,
|
@@ -7183,7 +7183,7 @@
|
|
7183 |
"model.layers.47.self_attn.o_proj": {
|
7184 |
"bias": null,
|
7185 |
"enable_norm": true,
|
7186 |
-
"enable_perm":
|
7187 |
"group_num": 1,
|
7188 |
"group_size": 5120,
|
7189 |
"in_features": 5120,
|
@@ -7207,7 +7207,7 @@
|
|
7207 |
"model.layers.47.self_attn.q_proj": {
|
7208 |
"bias": true,
|
7209 |
"enable_norm": true,
|
7210 |
-
"enable_perm":
|
7211 |
"group_num": 1,
|
7212 |
"group_size": 5120,
|
7213 |
"in_features": 5120,
|
@@ -7231,7 +7231,7 @@
|
|
7231 |
"model.layers.47.self_attn.v_proj": {
|
7232 |
"bias": true,
|
7233 |
"enable_norm": true,
|
7234 |
-
"enable_perm":
|
7235 |
"group_num": 1,
|
7236 |
"group_size": 5120,
|
7237 |
"in_features": 5120,
|
@@ -7255,7 +7255,7 @@
|
|
7255 |
"model.layers.5.mlp.down_proj": {
|
7256 |
"bias": null,
|
7257 |
"enable_norm": true,
|
7258 |
-
"enable_perm":
|
7259 |
"group_num": 1,
|
7260 |
"group_size": 13824,
|
7261 |
"in_features": 13824,
|
@@ -7279,7 +7279,7 @@
|
|
7279 |
"model.layers.5.mlp.gate_proj": {
|
7280 |
"bias": null,
|
7281 |
"enable_norm": true,
|
7282 |
-
"enable_perm":
|
7283 |
"group_num": 1,
|
7284 |
"group_size": 5120,
|
7285 |
"in_features": 5120,
|
@@ -7303,7 +7303,7 @@
|
|
7303 |
"model.layers.5.mlp.up_proj": {
|
7304 |
"bias": null,
|
7305 |
"enable_norm": true,
|
7306 |
-
"enable_perm":
|
7307 |
"group_num": 1,
|
7308 |
"group_size": 5120,
|
7309 |
"in_features": 5120,
|
@@ -7327,7 +7327,7 @@
|
|
7327 |
"model.layers.5.self_attn.k_proj": {
|
7328 |
"bias": true,
|
7329 |
"enable_norm": true,
|
7330 |
-
"enable_perm":
|
7331 |
"group_num": 1,
|
7332 |
"group_size": 5120,
|
7333 |
"in_features": 5120,
|
@@ -7351,7 +7351,7 @@
|
|
7351 |
"model.layers.5.self_attn.o_proj": {
|
7352 |
"bias": null,
|
7353 |
"enable_norm": true,
|
7354 |
-
"enable_perm":
|
7355 |
"group_num": 1,
|
7356 |
"group_size": 5120,
|
7357 |
"in_features": 5120,
|
@@ -7375,7 +7375,7 @@
|
|
7375 |
"model.layers.5.self_attn.q_proj": {
|
7376 |
"bias": true,
|
7377 |
"enable_norm": true,
|
7378 |
-
"enable_perm":
|
7379 |
"group_num": 1,
|
7380 |
"group_size": 5120,
|
7381 |
"in_features": 5120,
|
@@ -7399,7 +7399,7 @@
|
|
7399 |
"model.layers.5.self_attn.v_proj": {
|
7400 |
"bias": true,
|
7401 |
"enable_norm": true,
|
7402 |
-
"enable_perm":
|
7403 |
"group_num": 1,
|
7404 |
"group_size": 5120,
|
7405 |
"in_features": 5120,
|
@@ -7423,7 +7423,7 @@
|
|
7423 |
"model.layers.6.mlp.down_proj": {
|
7424 |
"bias": null,
|
7425 |
"enable_norm": true,
|
7426 |
-
"enable_perm":
|
7427 |
"group_num": 1,
|
7428 |
"group_size": 13824,
|
7429 |
"in_features": 13824,
|
@@ -7447,7 +7447,7 @@
|
|
7447 |
"model.layers.6.mlp.gate_proj": {
|
7448 |
"bias": null,
|
7449 |
"enable_norm": true,
|
7450 |
-
"enable_perm":
|
7451 |
"group_num": 1,
|
7452 |
"group_size": 5120,
|
7453 |
"in_features": 5120,
|
@@ -7471,7 +7471,7 @@
|
|
7471 |
"model.layers.6.mlp.up_proj": {
|
7472 |
"bias": null,
|
7473 |
"enable_norm": true,
|
7474 |
-
"enable_perm":
|
7475 |
"group_num": 1,
|
7476 |
"group_size": 5120,
|
7477 |
"in_features": 5120,
|
@@ -7495,7 +7495,7 @@
|
|
7495 |
"model.layers.6.self_attn.k_proj": {
|
7496 |
"bias": true,
|
7497 |
"enable_norm": true,
|
7498 |
-
"enable_perm":
|
7499 |
"group_num": 1,
|
7500 |
"group_size": 5120,
|
7501 |
"in_features": 5120,
|
@@ -7519,7 +7519,7 @@
|
|
7519 |
"model.layers.6.self_attn.o_proj": {
|
7520 |
"bias": null,
|
7521 |
"enable_norm": true,
|
7522 |
-
"enable_perm":
|
7523 |
"group_num": 1,
|
7524 |
"group_size": 5120,
|
7525 |
"in_features": 5120,
|
@@ -7543,7 +7543,7 @@
|
|
7543 |
"model.layers.6.self_attn.q_proj": {
|
7544 |
"bias": true,
|
7545 |
"enable_norm": true,
|
7546 |
-
"enable_perm":
|
7547 |
"group_num": 1,
|
7548 |
"group_size": 5120,
|
7549 |
"in_features": 5120,
|
@@ -7567,7 +7567,7 @@
|
|
7567 |
"model.layers.6.self_attn.v_proj": {
|
7568 |
"bias": true,
|
7569 |
"enable_norm": true,
|
7570 |
-
"enable_perm":
|
7571 |
"group_num": 1,
|
7572 |
"group_size": 5120,
|
7573 |
"in_features": 5120,
|
@@ -7591,7 +7591,7 @@
|
|
7591 |
"model.layers.7.mlp.down_proj": {
|
7592 |
"bias": null,
|
7593 |
"enable_norm": true,
|
7594 |
-
"enable_perm":
|
7595 |
"group_num": 1,
|
7596 |
"group_size": 13824,
|
7597 |
"in_features": 13824,
|
@@ -7615,7 +7615,7 @@
|
|
7615 |
"model.layers.7.mlp.gate_proj": {
|
7616 |
"bias": null,
|
7617 |
"enable_norm": true,
|
7618 |
-
"enable_perm":
|
7619 |
"group_num": 1,
|
7620 |
"group_size": 5120,
|
7621 |
"in_features": 5120,
|
@@ -7639,7 +7639,7 @@
|
|
7639 |
"model.layers.7.mlp.up_proj": {
|
7640 |
"bias": null,
|
7641 |
"enable_norm": true,
|
7642 |
-
"enable_perm":
|
7643 |
"group_num": 1,
|
7644 |
"group_size": 5120,
|
7645 |
"in_features": 5120,
|
@@ -7663,7 +7663,7 @@
|
|
7663 |
"model.layers.7.self_attn.k_proj": {
|
7664 |
"bias": true,
|
7665 |
"enable_norm": true,
|
7666 |
-
"enable_perm":
|
7667 |
"group_num": 1,
|
7668 |
"group_size": 5120,
|
7669 |
"in_features": 5120,
|
@@ -7687,7 +7687,7 @@
|
|
7687 |
"model.layers.7.self_attn.o_proj": {
|
7688 |
"bias": null,
|
7689 |
"enable_norm": true,
|
7690 |
-
"enable_perm":
|
7691 |
"group_num": 1,
|
7692 |
"group_size": 5120,
|
7693 |
"in_features": 5120,
|
@@ -7711,7 +7711,7 @@
|
|
7711 |
"model.layers.7.self_attn.q_proj": {
|
7712 |
"bias": true,
|
7713 |
"enable_norm": true,
|
7714 |
-
"enable_perm":
|
7715 |
"group_num": 1,
|
7716 |
"group_size": 5120,
|
7717 |
"in_features": 5120,
|
@@ -7735,7 +7735,7 @@
|
|
7735 |
"model.layers.7.self_attn.v_proj": {
|
7736 |
"bias": true,
|
7737 |
"enable_norm": true,
|
7738 |
-
"enable_perm":
|
7739 |
"group_num": 1,
|
7740 |
"group_size": 5120,
|
7741 |
"in_features": 5120,
|
@@ -7759,7 +7759,7 @@
|
|
7759 |
"model.layers.8.mlp.down_proj": {
|
7760 |
"bias": null,
|
7761 |
"enable_norm": true,
|
7762 |
-
"enable_perm":
|
7763 |
"group_num": 1,
|
7764 |
"group_size": 13824,
|
7765 |
"in_features": 13824,
|
@@ -7783,7 +7783,7 @@
|
|
7783 |
"model.layers.8.mlp.gate_proj": {
|
7784 |
"bias": null,
|
7785 |
"enable_norm": true,
|
7786 |
-
"enable_perm":
|
7787 |
"group_num": 1,
|
7788 |
"group_size": 5120,
|
7789 |
"in_features": 5120,
|
@@ -7807,7 +7807,7 @@
|
|
7807 |
"model.layers.8.mlp.up_proj": {
|
7808 |
"bias": null,
|
7809 |
"enable_norm": true,
|
7810 |
-
"enable_perm":
|
7811 |
"group_num": 1,
|
7812 |
"group_size": 5120,
|
7813 |
"in_features": 5120,
|
@@ -7831,7 +7831,7 @@
|
|
7831 |
"model.layers.8.self_attn.k_proj": {
|
7832 |
"bias": true,
|
7833 |
"enable_norm": true,
|
7834 |
-
"enable_perm":
|
7835 |
"group_num": 1,
|
7836 |
"group_size": 5120,
|
7837 |
"in_features": 5120,
|
@@ -7855,7 +7855,7 @@
|
|
7855 |
"model.layers.8.self_attn.o_proj": {
|
7856 |
"bias": null,
|
7857 |
"enable_norm": true,
|
7858 |
-
"enable_perm":
|
7859 |
"group_num": 1,
|
7860 |
"group_size": 5120,
|
7861 |
"in_features": 5120,
|
@@ -7879,7 +7879,7 @@
|
|
7879 |
"model.layers.8.self_attn.q_proj": {
|
7880 |
"bias": true,
|
7881 |
"enable_norm": true,
|
7882 |
-
"enable_perm":
|
7883 |
"group_num": 1,
|
7884 |
"group_size": 5120,
|
7885 |
"in_features": 5120,
|
@@ -7903,7 +7903,7 @@
|
|
7903 |
"model.layers.8.self_attn.v_proj": {
|
7904 |
"bias": true,
|
7905 |
"enable_norm": true,
|
7906 |
-
"enable_perm":
|
7907 |
"group_num": 1,
|
7908 |
"group_size": 5120,
|
7909 |
"in_features": 5120,
|
@@ -7927,7 +7927,7 @@
|
|
7927 |
"model.layers.9.mlp.down_proj": {
|
7928 |
"bias": null,
|
7929 |
"enable_norm": true,
|
7930 |
-
"enable_perm":
|
7931 |
"group_num": 1,
|
7932 |
"group_size": 13824,
|
7933 |
"in_features": 13824,
|
@@ -7951,7 +7951,7 @@
|
|
7951 |
"model.layers.9.mlp.gate_proj": {
|
7952 |
"bias": null,
|
7953 |
"enable_norm": true,
|
7954 |
-
"enable_perm":
|
7955 |
"group_num": 1,
|
7956 |
"group_size": 5120,
|
7957 |
"in_features": 5120,
|
@@ -7975,7 +7975,7 @@
|
|
7975 |
"model.layers.9.mlp.up_proj": {
|
7976 |
"bias": null,
|
7977 |
"enable_norm": true,
|
7978 |
-
"enable_perm":
|
7979 |
"group_num": 1,
|
7980 |
"group_size": 5120,
|
7981 |
"in_features": 5120,
|
@@ -7999,7 +7999,7 @@
|
|
7999 |
"model.layers.9.self_attn.k_proj": {
|
8000 |
"bias": true,
|
8001 |
"enable_norm": true,
|
8002 |
-
"enable_perm":
|
8003 |
"group_num": 1,
|
8004 |
"group_size": 5120,
|
8005 |
"in_features": 5120,
|
@@ -8023,7 +8023,7 @@
|
|
8023 |
"model.layers.9.self_attn.o_proj": {
|
8024 |
"bias": null,
|
8025 |
"enable_norm": true,
|
8026 |
-
"enable_perm":
|
8027 |
"group_num": 1,
|
8028 |
"group_size": 5120,
|
8029 |
"in_features": 5120,
|
@@ -8047,7 +8047,7 @@
|
|
8047 |
"model.layers.9.self_attn.q_proj": {
|
8048 |
"bias": true,
|
8049 |
"enable_norm": true,
|
8050 |
-
"enable_perm":
|
8051 |
"group_num": 1,
|
8052 |
"group_size": 5120,
|
8053 |
"in_features": 5120,
|
@@ -8071,7 +8071,7 @@
|
|
8071 |
"model.layers.9.self_attn.v_proj": {
|
8072 |
"bias": true,
|
8073 |
"enable_norm": true,
|
8074 |
-
"enable_perm":
|
8075 |
"group_num": 1,
|
8076 |
"group_size": 5120,
|
8077 |
"in_features": 5120,
|
|
|
31 |
"model.layers.0.mlp.down_proj": {
|
32 |
"bias": null,
|
33 |
"enable_norm": true,
|
34 |
+
"enable_perm": false,
|
35 |
"group_num": 1,
|
36 |
"group_size": 13824,
|
37 |
"in_features": 13824,
|
|
|
55 |
"model.layers.0.mlp.gate_proj": {
|
56 |
"bias": null,
|
57 |
"enable_norm": true,
|
58 |
+
"enable_perm": false,
|
59 |
"group_num": 1,
|
60 |
"group_size": 5120,
|
61 |
"in_features": 5120,
|
|
|
79 |
"model.layers.0.mlp.up_proj": {
|
80 |
"bias": null,
|
81 |
"enable_norm": true,
|
82 |
+
"enable_perm": false,
|
83 |
"group_num": 1,
|
84 |
"group_size": 5120,
|
85 |
"in_features": 5120,
|
|
|
103 |
"model.layers.0.self_attn.k_proj": {
|
104 |
"bias": true,
|
105 |
"enable_norm": true,
|
106 |
+
"enable_perm": false,
|
107 |
"group_num": 1,
|
108 |
"group_size": 5120,
|
109 |
"in_features": 5120,
|
|
|
127 |
"model.layers.0.self_attn.o_proj": {
|
128 |
"bias": null,
|
129 |
"enable_norm": true,
|
130 |
+
"enable_perm": false,
|
131 |
"group_num": 1,
|
132 |
"group_size": 5120,
|
133 |
"in_features": 5120,
|
|
|
151 |
"model.layers.0.self_attn.q_proj": {
|
152 |
"bias": true,
|
153 |
"enable_norm": true,
|
154 |
+
"enable_perm": false,
|
155 |
"group_num": 1,
|
156 |
"group_size": 5120,
|
157 |
"in_features": 5120,
|
|
|
175 |
"model.layers.0.self_attn.v_proj": {
|
176 |
"bias": true,
|
177 |
"enable_norm": true,
|
178 |
+
"enable_perm": false,
|
179 |
"group_num": 1,
|
180 |
"group_size": 5120,
|
181 |
"in_features": 5120,
|
|
|
199 |
"model.layers.1.mlp.down_proj": {
|
200 |
"bias": null,
|
201 |
"enable_norm": true,
|
202 |
+
"enable_perm": false,
|
203 |
"group_num": 1,
|
204 |
"group_size": 13824,
|
205 |
"in_features": 13824,
|
|
|
223 |
"model.layers.1.mlp.gate_proj": {
|
224 |
"bias": null,
|
225 |
"enable_norm": true,
|
226 |
+
"enable_perm": false,
|
227 |
"group_num": 1,
|
228 |
"group_size": 5120,
|
229 |
"in_features": 5120,
|
|
|
247 |
"model.layers.1.mlp.up_proj": {
|
248 |
"bias": null,
|
249 |
"enable_norm": true,
|
250 |
+
"enable_perm": false,
|
251 |
"group_num": 1,
|
252 |
"group_size": 5120,
|
253 |
"in_features": 5120,
|
|
|
271 |
"model.layers.1.self_attn.k_proj": {
|
272 |
"bias": true,
|
273 |
"enable_norm": true,
|
274 |
+
"enable_perm": false,
|
275 |
"group_num": 1,
|
276 |
"group_size": 5120,
|
277 |
"in_features": 5120,
|
|
|
295 |
"model.layers.1.self_attn.o_proj": {
|
296 |
"bias": null,
|
297 |
"enable_norm": true,
|
298 |
+
"enable_perm": false,
|
299 |
"group_num": 1,
|
300 |
"group_size": 5120,
|
301 |
"in_features": 5120,
|
|
|
319 |
"model.layers.1.self_attn.q_proj": {
|
320 |
"bias": true,
|
321 |
"enable_norm": true,
|
322 |
+
"enable_perm": false,
|
323 |
"group_num": 1,
|
324 |
"group_size": 5120,
|
325 |
"in_features": 5120,
|
|
|
343 |
"model.layers.1.self_attn.v_proj": {
|
344 |
"bias": true,
|
345 |
"enable_norm": true,
|
346 |
+
"enable_perm": false,
|
347 |
"group_num": 1,
|
348 |
"group_size": 5120,
|
349 |
"in_features": 5120,
|
|
|
367 |
"model.layers.10.mlp.down_proj": {
|
368 |
"bias": null,
|
369 |
"enable_norm": true,
|
370 |
+
"enable_perm": false,
|
371 |
"group_num": 1,
|
372 |
"group_size": 13824,
|
373 |
"in_features": 13824,
|
|
|
391 |
"model.layers.10.mlp.gate_proj": {
|
392 |
"bias": null,
|
393 |
"enable_norm": true,
|
394 |
+
"enable_perm": false,
|
395 |
"group_num": 1,
|
396 |
"group_size": 5120,
|
397 |
"in_features": 5120,
|
|
|
415 |
"model.layers.10.mlp.up_proj": {
|
416 |
"bias": null,
|
417 |
"enable_norm": true,
|
418 |
+
"enable_perm": false,
|
419 |
"group_num": 1,
|
420 |
"group_size": 5120,
|
421 |
"in_features": 5120,
|
|
|
439 |
"model.layers.10.self_attn.k_proj": {
|
440 |
"bias": true,
|
441 |
"enable_norm": true,
|
442 |
+
"enable_perm": false,
|
443 |
"group_num": 1,
|
444 |
"group_size": 5120,
|
445 |
"in_features": 5120,
|
|
|
463 |
"model.layers.10.self_attn.o_proj": {
|
464 |
"bias": null,
|
465 |
"enable_norm": true,
|
466 |
+
"enable_perm": false,
|
467 |
"group_num": 1,
|
468 |
"group_size": 5120,
|
469 |
"in_features": 5120,
|
|
|
487 |
"model.layers.10.self_attn.q_proj": {
|
488 |
"bias": true,
|
489 |
"enable_norm": true,
|
490 |
+
"enable_perm": false,
|
491 |
"group_num": 1,
|
492 |
"group_size": 5120,
|
493 |
"in_features": 5120,
|
|
|
511 |
"model.layers.10.self_attn.v_proj": {
|
512 |
"bias": true,
|
513 |
"enable_norm": true,
|
514 |
+
"enable_perm": false,
|
515 |
"group_num": 1,
|
516 |
"group_size": 5120,
|
517 |
"in_features": 5120,
|
|
|
535 |
"model.layers.11.mlp.down_proj": {
|
536 |
"bias": null,
|
537 |
"enable_norm": true,
|
538 |
+
"enable_perm": false,
|
539 |
"group_num": 1,
|
540 |
"group_size": 13824,
|
541 |
"in_features": 13824,
|
|
|
559 |
"model.layers.11.mlp.gate_proj": {
|
560 |
"bias": null,
|
561 |
"enable_norm": true,
|
562 |
+
"enable_perm": false,
|
563 |
"group_num": 1,
|
564 |
"group_size": 5120,
|
565 |
"in_features": 5120,
|
|
|
583 |
"model.layers.11.mlp.up_proj": {
|
584 |
"bias": null,
|
585 |
"enable_norm": true,
|
586 |
+
"enable_perm": false,
|
587 |
"group_num": 1,
|
588 |
"group_size": 5120,
|
589 |
"in_features": 5120,
|
|
|
607 |
"model.layers.11.self_attn.k_proj": {
|
608 |
"bias": true,
|
609 |
"enable_norm": true,
|
610 |
+
"enable_perm": false,
|
611 |
"group_num": 1,
|
612 |
"group_size": 5120,
|
613 |
"in_features": 5120,
|
|
|
631 |
"model.layers.11.self_attn.o_proj": {
|
632 |
"bias": null,
|
633 |
"enable_norm": true,
|
634 |
+
"enable_perm": false,
|
635 |
"group_num": 1,
|
636 |
"group_size": 5120,
|
637 |
"in_features": 5120,
|
|
|
655 |
"model.layers.11.self_attn.q_proj": {
|
656 |
"bias": true,
|
657 |
"enable_norm": true,
|
658 |
+
"enable_perm": false,
|
659 |
"group_num": 1,
|
660 |
"group_size": 5120,
|
661 |
"in_features": 5120,
|
|
|
679 |
"model.layers.11.self_attn.v_proj": {
|
680 |
"bias": true,
|
681 |
"enable_norm": true,
|
682 |
+
"enable_perm": false,
|
683 |
"group_num": 1,
|
684 |
"group_size": 5120,
|
685 |
"in_features": 5120,
|
|
|
703 |
"model.layers.12.mlp.down_proj": {
|
704 |
"bias": null,
|
705 |
"enable_norm": true,
|
706 |
+
"enable_perm": false,
|
707 |
"group_num": 1,
|
708 |
"group_size": 13824,
|
709 |
"in_features": 13824,
|
|
|
727 |
"model.layers.12.mlp.gate_proj": {
|
728 |
"bias": null,
|
729 |
"enable_norm": true,
|
730 |
+
"enable_perm": false,
|
731 |
"group_num": 1,
|
732 |
"group_size": 5120,
|
733 |
"in_features": 5120,
|
|
|
751 |
"model.layers.12.mlp.up_proj": {
|
752 |
"bias": null,
|
753 |
"enable_norm": true,
|
754 |
+
"enable_perm": false,
|
755 |
"group_num": 1,
|
756 |
"group_size": 5120,
|
757 |
"in_features": 5120,
|
|
|
775 |
"model.layers.12.self_attn.k_proj": {
|
776 |
"bias": true,
|
777 |
"enable_norm": true,
|
778 |
+
"enable_perm": false,
|
779 |
"group_num": 1,
|
780 |
"group_size": 5120,
|
781 |
"in_features": 5120,
|
|
|
799 |
"model.layers.12.self_attn.o_proj": {
|
800 |
"bias": null,
|
801 |
"enable_norm": true,
|
802 |
+
"enable_perm": false,
|
803 |
"group_num": 1,
|
804 |
"group_size": 5120,
|
805 |
"in_features": 5120,
|
|
|
823 |
"model.layers.12.self_attn.q_proj": {
|
824 |
"bias": true,
|
825 |
"enable_norm": true,
|
826 |
+
"enable_perm": false,
|
827 |
"group_num": 1,
|
828 |
"group_size": 5120,
|
829 |
"in_features": 5120,
|
|
|
847 |
"model.layers.12.self_attn.v_proj": {
|
848 |
"bias": true,
|
849 |
"enable_norm": true,
|
850 |
+
"enable_perm": false,
|
851 |
"group_num": 1,
|
852 |
"group_size": 5120,
|
853 |
"in_features": 5120,
|
|
|
871 |
"model.layers.13.mlp.down_proj": {
|
872 |
"bias": null,
|
873 |
"enable_norm": true,
|
874 |
+
"enable_perm": false,
|
875 |
"group_num": 1,
|
876 |
"group_size": 13824,
|
877 |
"in_features": 13824,
|
|
|
895 |
"model.layers.13.mlp.gate_proj": {
|
896 |
"bias": null,
|
897 |
"enable_norm": true,
|
898 |
+
"enable_perm": false,
|
899 |
"group_num": 1,
|
900 |
"group_size": 5120,
|
901 |
"in_features": 5120,
|
|
|
919 |
"model.layers.13.mlp.up_proj": {
|
920 |
"bias": null,
|
921 |
"enable_norm": true,
|
922 |
+
"enable_perm": false,
|
923 |
"group_num": 1,
|
924 |
"group_size": 5120,
|
925 |
"in_features": 5120,
|
|
|
943 |
"model.layers.13.self_attn.k_proj": {
|
944 |
"bias": true,
|
945 |
"enable_norm": true,
|
946 |
+
"enable_perm": false,
|
947 |
"group_num": 1,
|
948 |
"group_size": 5120,
|
949 |
"in_features": 5120,
|
|
|
967 |
"model.layers.13.self_attn.o_proj": {
|
968 |
"bias": null,
|
969 |
"enable_norm": true,
|
970 |
+
"enable_perm": false,
|
971 |
"group_num": 1,
|
972 |
"group_size": 5120,
|
973 |
"in_features": 5120,
|
|
|
991 |
"model.layers.13.self_attn.q_proj": {
|
992 |
"bias": true,
|
993 |
"enable_norm": true,
|
994 |
+
"enable_perm": false,
|
995 |
"group_num": 1,
|
996 |
"group_size": 5120,
|
997 |
"in_features": 5120,
|
|
|
1015 |
"model.layers.13.self_attn.v_proj": {
|
1016 |
"bias": true,
|
1017 |
"enable_norm": true,
|
1018 |
+
"enable_perm": false,
|
1019 |
"group_num": 1,
|
1020 |
"group_size": 5120,
|
1021 |
"in_features": 5120,
|
|
|
1039 |
"model.layers.14.mlp.down_proj": {
|
1040 |
"bias": null,
|
1041 |
"enable_norm": true,
|
1042 |
+
"enable_perm": false,
|
1043 |
"group_num": 1,
|
1044 |
"group_size": 13824,
|
1045 |
"in_features": 13824,
|
|
|
1063 |
"model.layers.14.mlp.gate_proj": {
|
1064 |
"bias": null,
|
1065 |
"enable_norm": true,
|
1066 |
+
"enable_perm": false,
|
1067 |
"group_num": 1,
|
1068 |
"group_size": 5120,
|
1069 |
"in_features": 5120,
|
|
|
1087 |
"model.layers.14.mlp.up_proj": {
|
1088 |
"bias": null,
|
1089 |
"enable_norm": true,
|
1090 |
+
"enable_perm": false,
|
1091 |
"group_num": 1,
|
1092 |
"group_size": 5120,
|
1093 |
"in_features": 5120,
|
|
|
1111 |
"model.layers.14.self_attn.k_proj": {
|
1112 |
"bias": true,
|
1113 |
"enable_norm": true,
|
1114 |
+
"enable_perm": false,
|
1115 |
"group_num": 1,
|
1116 |
"group_size": 5120,
|
1117 |
"in_features": 5120,
|
|
|
1135 |
"model.layers.14.self_attn.o_proj": {
|
1136 |
"bias": null,
|
1137 |
"enable_norm": true,
|
1138 |
+
"enable_perm": false,
|
1139 |
"group_num": 1,
|
1140 |
"group_size": 5120,
|
1141 |
"in_features": 5120,
|
|
|
1159 |
"model.layers.14.self_attn.q_proj": {
|
1160 |
"bias": true,
|
1161 |
"enable_norm": true,
|
1162 |
+
"enable_perm": false,
|
1163 |
"group_num": 1,
|
1164 |
"group_size": 5120,
|
1165 |
"in_features": 5120,
|
|
|
1183 |
"model.layers.14.self_attn.v_proj": {
|
1184 |
"bias": true,
|
1185 |
"enable_norm": true,
|
1186 |
+
"enable_perm": false,
|
1187 |
"group_num": 1,
|
1188 |
"group_size": 5120,
|
1189 |
"in_features": 5120,
|
|
|
1207 |
"model.layers.15.mlp.down_proj": {
|
1208 |
"bias": null,
|
1209 |
"enable_norm": true,
|
1210 |
+
"enable_perm": false,
|
1211 |
"group_num": 1,
|
1212 |
"group_size": 13824,
|
1213 |
"in_features": 13824,
|
|
|
1231 |
"model.layers.15.mlp.gate_proj": {
|
1232 |
"bias": null,
|
1233 |
"enable_norm": true,
|
1234 |
+
"enable_perm": false,
|
1235 |
"group_num": 1,
|
1236 |
"group_size": 5120,
|
1237 |
"in_features": 5120,
|
|
|
1255 |
"model.layers.15.mlp.up_proj": {
|
1256 |
"bias": null,
|
1257 |
"enable_norm": true,
|
1258 |
+
"enable_perm": false,
|
1259 |
"group_num": 1,
|
1260 |
"group_size": 5120,
|
1261 |
"in_features": 5120,
|
|
|
1279 |
"model.layers.15.self_attn.k_proj": {
|
1280 |
"bias": true,
|
1281 |
"enable_norm": true,
|
1282 |
+
"enable_perm": false,
|
1283 |
"group_num": 1,
|
1284 |
"group_size": 5120,
|
1285 |
"in_features": 5120,
|
|
|
1303 |
"model.layers.15.self_attn.o_proj": {
|
1304 |
"bias": null,
|
1305 |
"enable_norm": true,
|
1306 |
+
"enable_perm": false,
|
1307 |
"group_num": 1,
|
1308 |
"group_size": 5120,
|
1309 |
"in_features": 5120,
|
|
|
1327 |
"model.layers.15.self_attn.q_proj": {
|
1328 |
"bias": true,
|
1329 |
"enable_norm": true,
|
1330 |
+
"enable_perm": false,
|
1331 |
"group_num": 1,
|
1332 |
"group_size": 5120,
|
1333 |
"in_features": 5120,
|
|
|
1351 |
"model.layers.15.self_attn.v_proj": {
|
1352 |
"bias": true,
|
1353 |
"enable_norm": true,
|
1354 |
+
"enable_perm": false,
|
1355 |
"group_num": 1,
|
1356 |
"group_size": 5120,
|
1357 |
"in_features": 5120,
|
|
|
1375 |
"model.layers.16.mlp.down_proj": {
|
1376 |
"bias": null,
|
1377 |
"enable_norm": true,
|
1378 |
+
"enable_perm": false,
|
1379 |
"group_num": 1,
|
1380 |
"group_size": 13824,
|
1381 |
"in_features": 13824,
|
|
|
1399 |
"model.layers.16.mlp.gate_proj": {
|
1400 |
"bias": null,
|
1401 |
"enable_norm": true,
|
1402 |
+
"enable_perm": false,
|
1403 |
"group_num": 1,
|
1404 |
"group_size": 5120,
|
1405 |
"in_features": 5120,
|
|
|
1423 |
"model.layers.16.mlp.up_proj": {
|
1424 |
"bias": null,
|
1425 |
"enable_norm": true,
|
1426 |
+
"enable_perm": false,
|
1427 |
"group_num": 1,
|
1428 |
"group_size": 5120,
|
1429 |
"in_features": 5120,
|
|
|
1447 |
"model.layers.16.self_attn.k_proj": {
|
1448 |
"bias": true,
|
1449 |
"enable_norm": true,
|
1450 |
+
"enable_perm": false,
|
1451 |
"group_num": 1,
|
1452 |
"group_size": 5120,
|
1453 |
"in_features": 5120,
|
|
|
1471 |
"model.layers.16.self_attn.o_proj": {
|
1472 |
"bias": null,
|
1473 |
"enable_norm": true,
|
1474 |
+
"enable_perm": false,
|
1475 |
"group_num": 1,
|
1476 |
"group_size": 5120,
|
1477 |
"in_features": 5120,
|
|
|
1495 |
"model.layers.16.self_attn.q_proj": {
|
1496 |
"bias": true,
|
1497 |
"enable_norm": true,
|
1498 |
+
"enable_perm": false,
|
1499 |
"group_num": 1,
|
1500 |
"group_size": 5120,
|
1501 |
"in_features": 5120,
|
|
|
1519 |
"model.layers.16.self_attn.v_proj": {
|
1520 |
"bias": true,
|
1521 |
"enable_norm": true,
|
1522 |
+
"enable_perm": false,
|
1523 |
"group_num": 1,
|
1524 |
"group_size": 5120,
|
1525 |
"in_features": 5120,
|
|
|
1543 |
"model.layers.17.mlp.down_proj": {
|
1544 |
"bias": null,
|
1545 |
"enable_norm": true,
|
1546 |
+
"enable_perm": false,
|
1547 |
"group_num": 1,
|
1548 |
"group_size": 13824,
|
1549 |
"in_features": 13824,
|
|
|
1567 |
"model.layers.17.mlp.gate_proj": {
|
1568 |
"bias": null,
|
1569 |
"enable_norm": true,
|
1570 |
+
"enable_perm": false,
|
1571 |
"group_num": 1,
|
1572 |
"group_size": 5120,
|
1573 |
"in_features": 5120,
|
|
|
1591 |
"model.layers.17.mlp.up_proj": {
|
1592 |
"bias": null,
|
1593 |
"enable_norm": true,
|
1594 |
+
"enable_perm": false,
|
1595 |
"group_num": 1,
|
1596 |
"group_size": 5120,
|
1597 |
"in_features": 5120,
|
|
|
1615 |
"model.layers.17.self_attn.k_proj": {
|
1616 |
"bias": true,
|
1617 |
"enable_norm": true,
|
1618 |
+
"enable_perm": false,
|
1619 |
"group_num": 1,
|
1620 |
"group_size": 5120,
|
1621 |
"in_features": 5120,
|
|
|
1639 |
"model.layers.17.self_attn.o_proj": {
|
1640 |
"bias": null,
|
1641 |
"enable_norm": true,
|
1642 |
+
"enable_perm": false,
|
1643 |
"group_num": 1,
|
1644 |
"group_size": 5120,
|
1645 |
"in_features": 5120,
|
|
|
1663 |
"model.layers.17.self_attn.q_proj": {
|
1664 |
"bias": true,
|
1665 |
"enable_norm": true,
|
1666 |
+
"enable_perm": false,
|
1667 |
"group_num": 1,
|
1668 |
"group_size": 5120,
|
1669 |
"in_features": 5120,
|
|
|
1687 |
"model.layers.17.self_attn.v_proj": {
|
1688 |
"bias": true,
|
1689 |
"enable_norm": true,
|
1690 |
+
"enable_perm": false,
|
1691 |
"group_num": 1,
|
1692 |
"group_size": 5120,
|
1693 |
"in_features": 5120,
|
|
|
1711 |
"model.layers.18.mlp.down_proj": {
|
1712 |
"bias": null,
|
1713 |
"enable_norm": true,
|
1714 |
+
"enable_perm": false,
|
1715 |
"group_num": 1,
|
1716 |
"group_size": 13824,
|
1717 |
"in_features": 13824,
|
|
|
1735 |
"model.layers.18.mlp.gate_proj": {
|
1736 |
"bias": null,
|
1737 |
"enable_norm": true,
|
1738 |
+
"enable_perm": false,
|
1739 |
"group_num": 1,
|
1740 |
"group_size": 5120,
|
1741 |
"in_features": 5120,
|
|
|
1759 |
"model.layers.18.mlp.up_proj": {
|
1760 |
"bias": null,
|
1761 |
"enable_norm": true,
|
1762 |
+
"enable_perm": false,
|
1763 |
"group_num": 1,
|
1764 |
"group_size": 5120,
|
1765 |
"in_features": 5120,
|
|
|
1783 |
"model.layers.18.self_attn.k_proj": {
|
1784 |
"bias": true,
|
1785 |
"enable_norm": true,
|
1786 |
+
"enable_perm": false,
|
1787 |
"group_num": 1,
|
1788 |
"group_size": 5120,
|
1789 |
"in_features": 5120,
|
|
|
1807 |
"model.layers.18.self_attn.o_proj": {
|
1808 |
"bias": null,
|
1809 |
"enable_norm": true,
|
1810 |
+
"enable_perm": false,
|
1811 |
"group_num": 1,
|
1812 |
"group_size": 5120,
|
1813 |
"in_features": 5120,
|
|
|
1831 |
"model.layers.18.self_attn.q_proj": {
|
1832 |
"bias": true,
|
1833 |
"enable_norm": true,
|
1834 |
+
"enable_perm": false,
|
1835 |
"group_num": 1,
|
1836 |
"group_size": 5120,
|
1837 |
"in_features": 5120,
|
|
|
1855 |
"model.layers.18.self_attn.v_proj": {
|
1856 |
"bias": true,
|
1857 |
"enable_norm": true,
|
1858 |
+
"enable_perm": false,
|
1859 |
"group_num": 1,
|
1860 |
"group_size": 5120,
|
1861 |
"in_features": 5120,
|
|
|
1879 |
"model.layers.19.mlp.down_proj": {
|
1880 |
"bias": null,
|
1881 |
"enable_norm": true,
|
1882 |
+
"enable_perm": false,
|
1883 |
"group_num": 1,
|
1884 |
"group_size": 13824,
|
1885 |
"in_features": 13824,
|
|
|
1903 |
"model.layers.19.mlp.gate_proj": {
|
1904 |
"bias": null,
|
1905 |
"enable_norm": true,
|
1906 |
+
"enable_perm": false,
|
1907 |
"group_num": 1,
|
1908 |
"group_size": 5120,
|
1909 |
"in_features": 5120,
|
|
|
1927 |
"model.layers.19.mlp.up_proj": {
|
1928 |
"bias": null,
|
1929 |
"enable_norm": true,
|
1930 |
+
"enable_perm": false,
|
1931 |
"group_num": 1,
|
1932 |
"group_size": 5120,
|
1933 |
"in_features": 5120,
|
|
|
1951 |
"model.layers.19.self_attn.k_proj": {
|
1952 |
"bias": true,
|
1953 |
"enable_norm": true,
|
1954 |
+
"enable_perm": false,
|
1955 |
"group_num": 1,
|
1956 |
"group_size": 5120,
|
1957 |
"in_features": 5120,
|
|
|
1975 |
"model.layers.19.self_attn.o_proj": {
|
1976 |
"bias": null,
|
1977 |
"enable_norm": true,
|
1978 |
+
"enable_perm": false,
|
1979 |
"group_num": 1,
|
1980 |
"group_size": 5120,
|
1981 |
"in_features": 5120,
|
|
|
1999 |
"model.layers.19.self_attn.q_proj": {
|
2000 |
"bias": true,
|
2001 |
"enable_norm": true,
|
2002 |
+
"enable_perm": false,
|
2003 |
"group_num": 1,
|
2004 |
"group_size": 5120,
|
2005 |
"in_features": 5120,
|
|
|
2023 |
"model.layers.19.self_attn.v_proj": {
|
2024 |
"bias": true,
|
2025 |
"enable_norm": true,
|
2026 |
+
"enable_perm": false,
|
2027 |
"group_num": 1,
|
2028 |
"group_size": 5120,
|
2029 |
"in_features": 5120,
|
|
|
2047 |
"model.layers.2.mlp.down_proj": {
|
2048 |
"bias": null,
|
2049 |
"enable_norm": true,
|
2050 |
+
"enable_perm": false,
|
2051 |
"group_num": 1,
|
2052 |
"group_size": 13824,
|
2053 |
"in_features": 13824,
|
|
|
2071 |
"model.layers.2.mlp.gate_proj": {
|
2072 |
"bias": null,
|
2073 |
"enable_norm": true,
|
2074 |
+
"enable_perm": false,
|
2075 |
"group_num": 1,
|
2076 |
"group_size": 5120,
|
2077 |
"in_features": 5120,
|
|
|
2095 |
"model.layers.2.mlp.up_proj": {
|
2096 |
"bias": null,
|
2097 |
"enable_norm": true,
|
2098 |
+
"enable_perm": false,
|
2099 |
"group_num": 1,
|
2100 |
"group_size": 5120,
|
2101 |
"in_features": 5120,
|
|
|
2119 |
"model.layers.2.self_attn.k_proj": {
|
2120 |
"bias": true,
|
2121 |
"enable_norm": true,
|
2122 |
+
"enable_perm": false,
|
2123 |
"group_num": 1,
|
2124 |
"group_size": 5120,
|
2125 |
"in_features": 5120,
|
|
|
2143 |
"model.layers.2.self_attn.o_proj": {
|
2144 |
"bias": null,
|
2145 |
"enable_norm": true,
|
2146 |
+
"enable_perm": false,
|
2147 |
"group_num": 1,
|
2148 |
"group_size": 5120,
|
2149 |
"in_features": 5120,
|
|
|
2167 |
"model.layers.2.self_attn.q_proj": {
|
2168 |
"bias": true,
|
2169 |
"enable_norm": true,
|
2170 |
+
"enable_perm": false,
|
2171 |
"group_num": 1,
|
2172 |
"group_size": 5120,
|
2173 |
"in_features": 5120,
|
|
|
2191 |
"model.layers.2.self_attn.v_proj": {
|
2192 |
"bias": true,
|
2193 |
"enable_norm": true,
|
2194 |
+
"enable_perm": false,
|
2195 |
"group_num": 1,
|
2196 |
"group_size": 5120,
|
2197 |
"in_features": 5120,
|
|
|
2215 |
"model.layers.20.mlp.down_proj": {
|
2216 |
"bias": null,
|
2217 |
"enable_norm": true,
|
2218 |
+
"enable_perm": false,
|
2219 |
"group_num": 1,
|
2220 |
"group_size": 13824,
|
2221 |
"in_features": 13824,
|
|
|
2239 |
"model.layers.20.mlp.gate_proj": {
|
2240 |
"bias": null,
|
2241 |
"enable_norm": true,
|
2242 |
+
"enable_perm": false,
|
2243 |
"group_num": 1,
|
2244 |
"group_size": 5120,
|
2245 |
"in_features": 5120,
|
|
|
2263 |
"model.layers.20.mlp.up_proj": {
|
2264 |
"bias": null,
|
2265 |
"enable_norm": true,
|
2266 |
+
"enable_perm": false,
|
2267 |
"group_num": 1,
|
2268 |
"group_size": 5120,
|
2269 |
"in_features": 5120,
|
|
|
2287 |
"model.layers.20.self_attn.k_proj": {
|
2288 |
"bias": true,
|
2289 |
"enable_norm": true,
|
2290 |
+
"enable_perm": false,
|
2291 |
"group_num": 1,
|
2292 |
"group_size": 5120,
|
2293 |
"in_features": 5120,
|
|
|
2311 |
"model.layers.20.self_attn.o_proj": {
|
2312 |
"bias": null,
|
2313 |
"enable_norm": true,
|
2314 |
+
"enable_perm": false,
|
2315 |
"group_num": 1,
|
2316 |
"group_size": 5120,
|
2317 |
"in_features": 5120,
|
|
|
2335 |
"model.layers.20.self_attn.q_proj": {
|
2336 |
"bias": true,
|
2337 |
"enable_norm": true,
|
2338 |
+
"enable_perm": false,
|
2339 |
"group_num": 1,
|
2340 |
"group_size": 5120,
|
2341 |
"in_features": 5120,
|
|
|
2359 |
"model.layers.20.self_attn.v_proj": {
|
2360 |
"bias": true,
|
2361 |
"enable_norm": true,
|
2362 |
+
"enable_perm": false,
|
2363 |
"group_num": 1,
|
2364 |
"group_size": 5120,
|
2365 |
"in_features": 5120,
|
|
|
2383 |
"model.layers.21.mlp.down_proj": {
|
2384 |
"bias": null,
|
2385 |
"enable_norm": true,
|
2386 |
+
"enable_perm": false,
|
2387 |
"group_num": 1,
|
2388 |
"group_size": 13824,
|
2389 |
"in_features": 13824,
|
|
|
2407 |
"model.layers.21.mlp.gate_proj": {
|
2408 |
"bias": null,
|
2409 |
"enable_norm": true,
|
2410 |
+
"enable_perm": false,
|
2411 |
"group_num": 1,
|
2412 |
"group_size": 5120,
|
2413 |
"in_features": 5120,
|
|
|
2431 |
"model.layers.21.mlp.up_proj": {
|
2432 |
"bias": null,
|
2433 |
"enable_norm": true,
|
2434 |
+
"enable_perm": false,
|
2435 |
"group_num": 1,
|
2436 |
"group_size": 5120,
|
2437 |
"in_features": 5120,
|
|
|
2455 |
"model.layers.21.self_attn.k_proj": {
|
2456 |
"bias": true,
|
2457 |
"enable_norm": true,
|
2458 |
+
"enable_perm": false,
|
2459 |
"group_num": 1,
|
2460 |
"group_size": 5120,
|
2461 |
"in_features": 5120,
|
|
|
2479 |
"model.layers.21.self_attn.o_proj": {
|
2480 |
"bias": null,
|
2481 |
"enable_norm": true,
|
2482 |
+
"enable_perm": false,
|
2483 |
"group_num": 1,
|
2484 |
"group_size": 5120,
|
2485 |
"in_features": 5120,
|
|
|
2503 |
"model.layers.21.self_attn.q_proj": {
|
2504 |
"bias": true,
|
2505 |
"enable_norm": true,
|
2506 |
+
"enable_perm": false,
|
2507 |
"group_num": 1,
|
2508 |
"group_size": 5120,
|
2509 |
"in_features": 5120,
|
|
|
2527 |
"model.layers.21.self_attn.v_proj": {
|
2528 |
"bias": true,
|
2529 |
"enable_norm": true,
|
2530 |
+
"enable_perm": false,
|
2531 |
"group_num": 1,
|
2532 |
"group_size": 5120,
|
2533 |
"in_features": 5120,
|
|
|
2551 |
"model.layers.22.mlp.down_proj": {
|
2552 |
"bias": null,
|
2553 |
"enable_norm": true,
|
2554 |
+
"enable_perm": false,
|
2555 |
"group_num": 1,
|
2556 |
"group_size": 13824,
|
2557 |
"in_features": 13824,
|
|
|
2575 |
"model.layers.22.mlp.gate_proj": {
|
2576 |
"bias": null,
|
2577 |
"enable_norm": true,
|
2578 |
+
"enable_perm": false,
|
2579 |
"group_num": 1,
|
2580 |
"group_size": 5120,
|
2581 |
"in_features": 5120,
|
|
|
2599 |
"model.layers.22.mlp.up_proj": {
|
2600 |
"bias": null,
|
2601 |
"enable_norm": true,
|
2602 |
+
"enable_perm": false,
|
2603 |
"group_num": 1,
|
2604 |
"group_size": 5120,
|
2605 |
"in_features": 5120,
|
|
|
2623 |
"model.layers.22.self_attn.k_proj": {
|
2624 |
"bias": true,
|
2625 |
"enable_norm": true,
|
2626 |
+
"enable_perm": false,
|
2627 |
"group_num": 1,
|
2628 |
"group_size": 5120,
|
2629 |
"in_features": 5120,
|
|
|
2647 |
"model.layers.22.self_attn.o_proj": {
|
2648 |
"bias": null,
|
2649 |
"enable_norm": true,
|
2650 |
+
"enable_perm": false,
|
2651 |
"group_num": 1,
|
2652 |
"group_size": 5120,
|
2653 |
"in_features": 5120,
|
|
|
2671 |
"model.layers.22.self_attn.q_proj": {
|
2672 |
"bias": true,
|
2673 |
"enable_norm": true,
|
2674 |
+
"enable_perm": false,
|
2675 |
"group_num": 1,
|
2676 |
"group_size": 5120,
|
2677 |
"in_features": 5120,
|
|
|
2695 |
"model.layers.22.self_attn.v_proj": {
|
2696 |
"bias": true,
|
2697 |
"enable_norm": true,
|
2698 |
+
"enable_perm": false,
|
2699 |
"group_num": 1,
|
2700 |
"group_size": 5120,
|
2701 |
"in_features": 5120,
|
|
|
2719 |
"model.layers.23.mlp.down_proj": {
|
2720 |
"bias": null,
|
2721 |
"enable_norm": true,
|
2722 |
+
"enable_perm": false,
|
2723 |
"group_num": 1,
|
2724 |
"group_size": 13824,
|
2725 |
"in_features": 13824,
|
|
|
2743 |
"model.layers.23.mlp.gate_proj": {
|
2744 |
"bias": null,
|
2745 |
"enable_norm": true,
|
2746 |
+
"enable_perm": false,
|
2747 |
"group_num": 1,
|
2748 |
"group_size": 5120,
|
2749 |
"in_features": 5120,
|
|
|
2767 |
"model.layers.23.mlp.up_proj": {
|
2768 |
"bias": null,
|
2769 |
"enable_norm": true,
|
2770 |
+
"enable_perm": false,
|
2771 |
"group_num": 1,
|
2772 |
"group_size": 5120,
|
2773 |
"in_features": 5120,
|
|
|
2791 |
"model.layers.23.self_attn.k_proj": {
|
2792 |
"bias": true,
|
2793 |
"enable_norm": true,
|
2794 |
+
"enable_perm": false,
|
2795 |
"group_num": 1,
|
2796 |
"group_size": 5120,
|
2797 |
"in_features": 5120,
|
|
|
2815 |
"model.layers.23.self_attn.o_proj": {
|
2816 |
"bias": null,
|
2817 |
"enable_norm": true,
|
2818 |
+
"enable_perm": false,
|
2819 |
"group_num": 1,
|
2820 |
"group_size": 5120,
|
2821 |
"in_features": 5120,
|
|
|
2839 |
"model.layers.23.self_attn.q_proj": {
|
2840 |
"bias": true,
|
2841 |
"enable_norm": true,
|
2842 |
+
"enable_perm": false,
|
2843 |
"group_num": 1,
|
2844 |
"group_size": 5120,
|
2845 |
"in_features": 5120,
|
|
|
2863 |
"model.layers.23.self_attn.v_proj": {
|
2864 |
"bias": true,
|
2865 |
"enable_norm": true,
|
2866 |
+
"enable_perm": false,
|
2867 |
"group_num": 1,
|
2868 |
"group_size": 5120,
|
2869 |
"in_features": 5120,
|
|
|
2887 |
"model.layers.24.mlp.down_proj": {
|
2888 |
"bias": null,
|
2889 |
"enable_norm": true,
|
2890 |
+
"enable_perm": false,
|
2891 |
"group_num": 1,
|
2892 |
"group_size": 13824,
|
2893 |
"in_features": 13824,
|
|
|
2911 |
"model.layers.24.mlp.gate_proj": {
|
2912 |
"bias": null,
|
2913 |
"enable_norm": true,
|
2914 |
+
"enable_perm": false,
|
2915 |
"group_num": 1,
|
2916 |
"group_size": 5120,
|
2917 |
"in_features": 5120,
|
|
|
2935 |
"model.layers.24.mlp.up_proj": {
|
2936 |
"bias": null,
|
2937 |
"enable_norm": true,
|
2938 |
+
"enable_perm": false,
|
2939 |
"group_num": 1,
|
2940 |
"group_size": 5120,
|
2941 |
"in_features": 5120,
|
|
|
2959 |
"model.layers.24.self_attn.k_proj": {
|
2960 |
"bias": true,
|
2961 |
"enable_norm": true,
|
2962 |
+
"enable_perm": false,
|
2963 |
"group_num": 1,
|
2964 |
"group_size": 5120,
|
2965 |
"in_features": 5120,
|
|
|
2983 |
"model.layers.24.self_attn.o_proj": {
|
2984 |
"bias": null,
|
2985 |
"enable_norm": true,
|
2986 |
+
"enable_perm": false,
|
2987 |
"group_num": 1,
|
2988 |
"group_size": 5120,
|
2989 |
"in_features": 5120,
|
|
|
3007 |
"model.layers.24.self_attn.q_proj": {
|
3008 |
"bias": true,
|
3009 |
"enable_norm": true,
|
3010 |
+
"enable_perm": false,
|
3011 |
"group_num": 1,
|
3012 |
"group_size": 5120,
|
3013 |
"in_features": 5120,
|
|
|
3031 |
"model.layers.24.self_attn.v_proj": {
|
3032 |
"bias": true,
|
3033 |
"enable_norm": true,
|
3034 |
+
"enable_perm": false,
|
3035 |
"group_num": 1,
|
3036 |
"group_size": 5120,
|
3037 |
"in_features": 5120,
|
|
|
3055 |
"model.layers.25.mlp.down_proj": {
|
3056 |
"bias": null,
|
3057 |
"enable_norm": true,
|
3058 |
+
"enable_perm": false,
|
3059 |
"group_num": 1,
|
3060 |
"group_size": 13824,
|
3061 |
"in_features": 13824,
|
|
|
3079 |
"model.layers.25.mlp.gate_proj": {
|
3080 |
"bias": null,
|
3081 |
"enable_norm": true,
|
3082 |
+
"enable_perm": false,
|
3083 |
"group_num": 1,
|
3084 |
"group_size": 5120,
|
3085 |
"in_features": 5120,
|
|
|
3103 |
"model.layers.25.mlp.up_proj": {
|
3104 |
"bias": null,
|
3105 |
"enable_norm": true,
|
3106 |
+
"enable_perm": false,
|
3107 |
"group_num": 1,
|
3108 |
"group_size": 5120,
|
3109 |
"in_features": 5120,
|
|
|
3127 |
"model.layers.25.self_attn.k_proj": {
|
3128 |
"bias": true,
|
3129 |
"enable_norm": true,
|
3130 |
+
"enable_perm": false,
|
3131 |
"group_num": 1,
|
3132 |
"group_size": 5120,
|
3133 |
"in_features": 5120,
|
|
|
3151 |
"model.layers.25.self_attn.o_proj": {
|
3152 |
"bias": null,
|
3153 |
"enable_norm": true,
|
3154 |
+
"enable_perm": false,
|
3155 |
"group_num": 1,
|
3156 |
"group_size": 5120,
|
3157 |
"in_features": 5120,
|
|
|
3175 |
"model.layers.25.self_attn.q_proj": {
|
3176 |
"bias": true,
|
3177 |
"enable_norm": true,
|
3178 |
+
"enable_perm": false,
|
3179 |
"group_num": 1,
|
3180 |
"group_size": 5120,
|
3181 |
"in_features": 5120,
|
|
|
3199 |
"model.layers.25.self_attn.v_proj": {
|
3200 |
"bias": true,
|
3201 |
"enable_norm": true,
|
3202 |
+
"enable_perm": false,
|
3203 |
"group_num": 1,
|
3204 |
"group_size": 5120,
|
3205 |
"in_features": 5120,
|
|
|
3223 |
"model.layers.26.mlp.down_proj": {
|
3224 |
"bias": null,
|
3225 |
"enable_norm": true,
|
3226 |
+
"enable_perm": false,
|
3227 |
"group_num": 1,
|
3228 |
"group_size": 13824,
|
3229 |
"in_features": 13824,
|
|
|
3247 |
"model.layers.26.mlp.gate_proj": {
|
3248 |
"bias": null,
|
3249 |
"enable_norm": true,
|
3250 |
+
"enable_perm": false,
|
3251 |
"group_num": 1,
|
3252 |
"group_size": 5120,
|
3253 |
"in_features": 5120,
|
|
|
3271 |
"model.layers.26.mlp.up_proj": {
|
3272 |
"bias": null,
|
3273 |
"enable_norm": true,
|
3274 |
+
"enable_perm": false,
|
3275 |
"group_num": 1,
|
3276 |
"group_size": 5120,
|
3277 |
"in_features": 5120,
|
|
|
3295 |
"model.layers.26.self_attn.k_proj": {
|
3296 |
"bias": true,
|
3297 |
"enable_norm": true,
|
3298 |
+
"enable_perm": false,
|
3299 |
"group_num": 1,
|
3300 |
"group_size": 5120,
|
3301 |
"in_features": 5120,
|
|
|
3319 |
"model.layers.26.self_attn.o_proj": {
|
3320 |
"bias": null,
|
3321 |
"enable_norm": true,
|
3322 |
+
"enable_perm": false,
|
3323 |
"group_num": 1,
|
3324 |
"group_size": 5120,
|
3325 |
"in_features": 5120,
|
|
|
3343 |
"model.layers.26.self_attn.q_proj": {
|
3344 |
"bias": true,
|
3345 |
"enable_norm": true,
|
3346 |
+
"enable_perm": false,
|
3347 |
"group_num": 1,
|
3348 |
"group_size": 5120,
|
3349 |
"in_features": 5120,
|
|
|
3367 |
"model.layers.26.self_attn.v_proj": {
|
3368 |
"bias": true,
|
3369 |
"enable_norm": true,
|
3370 |
+
"enable_perm": false,
|
3371 |
"group_num": 1,
|
3372 |
"group_size": 5120,
|
3373 |
"in_features": 5120,
|
|
|
3391 |
"model.layers.27.mlp.down_proj": {
|
3392 |
"bias": null,
|
3393 |
"enable_norm": true,
|
3394 |
+
"enable_perm": false,
|
3395 |
"group_num": 1,
|
3396 |
"group_size": 13824,
|
3397 |
"in_features": 13824,
|
|
|
3415 |
"model.layers.27.mlp.gate_proj": {
|
3416 |
"bias": null,
|
3417 |
"enable_norm": true,
|
3418 |
+
"enable_perm": false,
|
3419 |
"group_num": 1,
|
3420 |
"group_size": 5120,
|
3421 |
"in_features": 5120,
|
|
|
3439 |
"model.layers.27.mlp.up_proj": {
|
3440 |
"bias": null,
|
3441 |
"enable_norm": true,
|
3442 |
+
"enable_perm": false,
|
3443 |
"group_num": 1,
|
3444 |
"group_size": 5120,
|
3445 |
"in_features": 5120,
|
|
|
3463 |
"model.layers.27.self_attn.k_proj": {
|
3464 |
"bias": true,
|
3465 |
"enable_norm": true,
|
3466 |
+
"enable_perm": false,
|
3467 |
"group_num": 1,
|
3468 |
"group_size": 5120,
|
3469 |
"in_features": 5120,
|
|
|
3487 |
"model.layers.27.self_attn.o_proj": {
|
3488 |
"bias": null,
|
3489 |
"enable_norm": true,
|
3490 |
+
"enable_perm": false,
|
3491 |
"group_num": 1,
|
3492 |
"group_size": 5120,
|
3493 |
"in_features": 5120,
|
|
|
3511 |
"model.layers.27.self_attn.q_proj": {
|
3512 |
"bias": true,
|
3513 |
"enable_norm": true,
|
3514 |
+
"enable_perm": false,
|
3515 |
"group_num": 1,
|
3516 |
"group_size": 5120,
|
3517 |
"in_features": 5120,
|
|
|
3535 |
"model.layers.27.self_attn.v_proj": {
|
3536 |
"bias": true,
|
3537 |
"enable_norm": true,
|
3538 |
+
"enable_perm": false,
|
3539 |
"group_num": 1,
|
3540 |
"group_size": 5120,
|
3541 |
"in_features": 5120,
|
|
|
3559 |
"model.layers.28.mlp.down_proj": {
|
3560 |
"bias": null,
|
3561 |
"enable_norm": true,
|
3562 |
+
"enable_perm": false,
|
3563 |
"group_num": 1,
|
3564 |
"group_size": 13824,
|
3565 |
"in_features": 13824,
|
|
|
3583 |
"model.layers.28.mlp.gate_proj": {
|
3584 |
"bias": null,
|
3585 |
"enable_norm": true,
|
3586 |
+
"enable_perm": false,
|
3587 |
"group_num": 1,
|
3588 |
"group_size": 5120,
|
3589 |
"in_features": 5120,
|
|
|
3607 |
"model.layers.28.mlp.up_proj": {
|
3608 |
"bias": null,
|
3609 |
"enable_norm": true,
|
3610 |
+
"enable_perm": false,
|
3611 |
"group_num": 1,
|
3612 |
"group_size": 5120,
|
3613 |
"in_features": 5120,
|
|
|
3631 |
"model.layers.28.self_attn.k_proj": {
|
3632 |
"bias": true,
|
3633 |
"enable_norm": true,
|
3634 |
+
"enable_perm": false,
|
3635 |
"group_num": 1,
|
3636 |
"group_size": 5120,
|
3637 |
"in_features": 5120,
|
|
|
3655 |
"model.layers.28.self_attn.o_proj": {
|
3656 |
"bias": null,
|
3657 |
"enable_norm": true,
|
3658 |
+
"enable_perm": false,
|
3659 |
"group_num": 1,
|
3660 |
"group_size": 5120,
|
3661 |
"in_features": 5120,
|
|
|
3679 |
"model.layers.28.self_attn.q_proj": {
|
3680 |
"bias": true,
|
3681 |
"enable_norm": true,
|
3682 |
+
"enable_perm": false,
|
3683 |
"group_num": 1,
|
3684 |
"group_size": 5120,
|
3685 |
"in_features": 5120,
|
|
|
3703 |
"model.layers.28.self_attn.v_proj": {
|
3704 |
"bias": true,
|
3705 |
"enable_norm": true,
|
3706 |
+
"enable_perm": false,
|
3707 |
"group_num": 1,
|
3708 |
"group_size": 5120,
|
3709 |
"in_features": 5120,
|
|
|
3727 |
"model.layers.29.mlp.down_proj": {
|
3728 |
"bias": null,
|
3729 |
"enable_norm": true,
|
3730 |
+
"enable_perm": false,
|
3731 |
"group_num": 1,
|
3732 |
"group_size": 13824,
|
3733 |
"in_features": 13824,
|
|
|
3751 |
"model.layers.29.mlp.gate_proj": {
|
3752 |
"bias": null,
|
3753 |
"enable_norm": true,
|
3754 |
+
"enable_perm": false,
|
3755 |
"group_num": 1,
|
3756 |
"group_size": 5120,
|
3757 |
"in_features": 5120,
|
|
|
3775 |
"model.layers.29.mlp.up_proj": {
|
3776 |
"bias": null,
|
3777 |
"enable_norm": true,
|
3778 |
+
"enable_perm": false,
|
3779 |
"group_num": 1,
|
3780 |
"group_size": 5120,
|
3781 |
"in_features": 5120,
|
|
|
3799 |
"model.layers.29.self_attn.k_proj": {
|
3800 |
"bias": true,
|
3801 |
"enable_norm": true,
|
3802 |
+
"enable_perm": false,
|
3803 |
"group_num": 1,
|
3804 |
"group_size": 5120,
|
3805 |
"in_features": 5120,
|
|
|
3823 |
"model.layers.29.self_attn.o_proj": {
|
3824 |
"bias": null,
|
3825 |
"enable_norm": true,
|
3826 |
+
"enable_perm": false,
|
3827 |
"group_num": 1,
|
3828 |
"group_size": 5120,
|
3829 |
"in_features": 5120,
|
|
|
3847 |
"model.layers.29.self_attn.q_proj": {
|
3848 |
"bias": true,
|
3849 |
"enable_norm": true,
|
3850 |
+
"enable_perm": false,
|
3851 |
"group_num": 1,
|
3852 |
"group_size": 5120,
|
3853 |
"in_features": 5120,
|
|
|
3871 |
"model.layers.29.self_attn.v_proj": {
|
3872 |
"bias": true,
|
3873 |
"enable_norm": true,
|
3874 |
+
"enable_perm": false,
|
3875 |
"group_num": 1,
|
3876 |
"group_size": 5120,
|
3877 |
"in_features": 5120,
|
|
|
3895 |
"model.layers.3.mlp.down_proj": {
|
3896 |
"bias": null,
|
3897 |
"enable_norm": true,
|
3898 |
+
"enable_perm": false,
|
3899 |
"group_num": 1,
|
3900 |
"group_size": 13824,
|
3901 |
"in_features": 13824,
|
|
|
3919 |
"model.layers.3.mlp.gate_proj": {
|
3920 |
"bias": null,
|
3921 |
"enable_norm": true,
|
3922 |
+
"enable_perm": false,
|
3923 |
"group_num": 1,
|
3924 |
"group_size": 5120,
|
3925 |
"in_features": 5120,
|
|
|
3943 |
"model.layers.3.mlp.up_proj": {
|
3944 |
"bias": null,
|
3945 |
"enable_norm": true,
|
3946 |
+
"enable_perm": false,
|
3947 |
"group_num": 1,
|
3948 |
"group_size": 5120,
|
3949 |
"in_features": 5120,
|
|
|
3967 |
"model.layers.3.self_attn.k_proj": {
|
3968 |
"bias": true,
|
3969 |
"enable_norm": true,
|
3970 |
+
"enable_perm": false,
|
3971 |
"group_num": 1,
|
3972 |
"group_size": 5120,
|
3973 |
"in_features": 5120,
|
|
|
3991 |
"model.layers.3.self_attn.o_proj": {
|
3992 |
"bias": null,
|
3993 |
"enable_norm": true,
|
3994 |
+
"enable_perm": false,
|
3995 |
"group_num": 1,
|
3996 |
"group_size": 5120,
|
3997 |
"in_features": 5120,
|
|
|
4015 |
"model.layers.3.self_attn.q_proj": {
|
4016 |
"bias": true,
|
4017 |
"enable_norm": true,
|
4018 |
+
"enable_perm": false,
|
4019 |
"group_num": 1,
|
4020 |
"group_size": 5120,
|
4021 |
"in_features": 5120,
|
|
|
4039 |
"model.layers.3.self_attn.v_proj": {
|
4040 |
"bias": true,
|
4041 |
"enable_norm": true,
|
4042 |
+
"enable_perm": false,
|
4043 |
"group_num": 1,
|
4044 |
"group_size": 5120,
|
4045 |
"in_features": 5120,
|
|
|
4063 |
"model.layers.30.mlp.down_proj": {
|
4064 |
"bias": null,
|
4065 |
"enable_norm": true,
|
4066 |
+
"enable_perm": false,
|
4067 |
"group_num": 1,
|
4068 |
"group_size": 13824,
|
4069 |
"in_features": 13824,
|
|
|
4087 |
"model.layers.30.mlp.gate_proj": {
|
4088 |
"bias": null,
|
4089 |
"enable_norm": true,
|
4090 |
+
"enable_perm": false,
|
4091 |
"group_num": 1,
|
4092 |
"group_size": 5120,
|
4093 |
"in_features": 5120,
|
|
|
4111 |
"model.layers.30.mlp.up_proj": {
|
4112 |
"bias": null,
|
4113 |
"enable_norm": true,
|
4114 |
+
"enable_perm": false,
|
4115 |
"group_num": 1,
|
4116 |
"group_size": 5120,
|
4117 |
"in_features": 5120,
|
|
|
4135 |
"model.layers.30.self_attn.k_proj": {
|
4136 |
"bias": true,
|
4137 |
"enable_norm": true,
|
4138 |
+
"enable_perm": false,
|
4139 |
"group_num": 1,
|
4140 |
"group_size": 5120,
|
4141 |
"in_features": 5120,
|
|
|
4159 |
"model.layers.30.self_attn.o_proj": {
|
4160 |
"bias": null,
|
4161 |
"enable_norm": true,
|
4162 |
+
"enable_perm": false,
|
4163 |
"group_num": 1,
|
4164 |
"group_size": 5120,
|
4165 |
"in_features": 5120,
|
|
|
4183 |
"model.layers.30.self_attn.q_proj": {
|
4184 |
"bias": true,
|
4185 |
"enable_norm": true,
|
4186 |
+
"enable_perm": false,
|
4187 |
"group_num": 1,
|
4188 |
"group_size": 5120,
|
4189 |
"in_features": 5120,
|
|
|
4207 |
"model.layers.30.self_attn.v_proj": {
|
4208 |
"bias": true,
|
4209 |
"enable_norm": true,
|
4210 |
+
"enable_perm": false,
|
4211 |
"group_num": 1,
|
4212 |
"group_size": 5120,
|
4213 |
"in_features": 5120,
|
|
|
4231 |
"model.layers.31.mlp.down_proj": {
|
4232 |
"bias": null,
|
4233 |
"enable_norm": true,
|
4234 |
+
"enable_perm": false,
|
4235 |
"group_num": 1,
|
4236 |
"group_size": 13824,
|
4237 |
"in_features": 13824,
|
|
|
4255 |
"model.layers.31.mlp.gate_proj": {
|
4256 |
"bias": null,
|
4257 |
"enable_norm": true,
|
4258 |
+
"enable_perm": false,
|
4259 |
"group_num": 1,
|
4260 |
"group_size": 5120,
|
4261 |
"in_features": 5120,
|
|
|
4279 |
"model.layers.31.mlp.up_proj": {
|
4280 |
"bias": null,
|
4281 |
"enable_norm": true,
|
4282 |
+
"enable_perm": false,
|
4283 |
"group_num": 1,
|
4284 |
"group_size": 5120,
|
4285 |
"in_features": 5120,
|
|
|
4303 |
"model.layers.31.self_attn.k_proj": {
|
4304 |
"bias": true,
|
4305 |
"enable_norm": true,
|
4306 |
+
"enable_perm": false,
|
4307 |
"group_num": 1,
|
4308 |
"group_size": 5120,
|
4309 |
"in_features": 5120,
|
|
|
4327 |
"model.layers.31.self_attn.o_proj": {
|
4328 |
"bias": null,
|
4329 |
"enable_norm": true,
|
4330 |
+
"enable_perm": false,
|
4331 |
"group_num": 1,
|
4332 |
"group_size": 5120,
|
4333 |
"in_features": 5120,
|
|
|
4351 |
"model.layers.31.self_attn.q_proj": {
|
4352 |
"bias": true,
|
4353 |
"enable_norm": true,
|
4354 |
+
"enable_perm": false,
|
4355 |
"group_num": 1,
|
4356 |
"group_size": 5120,
|
4357 |
"in_features": 5120,
|
|
|
4375 |
"model.layers.31.self_attn.v_proj": {
|
4376 |
"bias": true,
|
4377 |
"enable_norm": true,
|
4378 |
+
"enable_perm": false,
|
4379 |
"group_num": 1,
|
4380 |
"group_size": 5120,
|
4381 |
"in_features": 5120,
|
|
|
4399 |
"model.layers.32.mlp.down_proj": {
|
4400 |
"bias": null,
|
4401 |
"enable_norm": true,
|
4402 |
+
"enable_perm": false,
|
4403 |
"group_num": 1,
|
4404 |
"group_size": 13824,
|
4405 |
"in_features": 13824,
|
|
|
4423 |
"model.layers.32.mlp.gate_proj": {
|
4424 |
"bias": null,
|
4425 |
"enable_norm": true,
|
4426 |
+
"enable_perm": false,
|
4427 |
"group_num": 1,
|
4428 |
"group_size": 5120,
|
4429 |
"in_features": 5120,
|
|
|
4447 |
"model.layers.32.mlp.up_proj": {
|
4448 |
"bias": null,
|
4449 |
"enable_norm": true,
|
4450 |
+
"enable_perm": false,
|
4451 |
"group_num": 1,
|
4452 |
"group_size": 5120,
|
4453 |
"in_features": 5120,
|
|
|
4471 |
"model.layers.32.self_attn.k_proj": {
|
4472 |
"bias": true,
|
4473 |
"enable_norm": true,
|
4474 |
+
"enable_perm": false,
|
4475 |
"group_num": 1,
|
4476 |
"group_size": 5120,
|
4477 |
"in_features": 5120,
|
|
|
4495 |
"model.layers.32.self_attn.o_proj": {
|
4496 |
"bias": null,
|
4497 |
"enable_norm": true,
|
4498 |
+
"enable_perm": false,
|
4499 |
"group_num": 1,
|
4500 |
"group_size": 5120,
|
4501 |
"in_features": 5120,
|
|
|
4519 |
"model.layers.32.self_attn.q_proj": {
|
4520 |
"bias": true,
|
4521 |
"enable_norm": true,
|
4522 |
+
"enable_perm": false,
|
4523 |
"group_num": 1,
|
4524 |
"group_size": 5120,
|
4525 |
"in_features": 5120,
|
|
|
4543 |
"model.layers.32.self_attn.v_proj": {
|
4544 |
"bias": true,
|
4545 |
"enable_norm": true,
|
4546 |
+
"enable_perm": false,
|
4547 |
"group_num": 1,
|
4548 |
"group_size": 5120,
|
4549 |
"in_features": 5120,
|
|
|
4567 |
"model.layers.33.mlp.down_proj": {
|
4568 |
"bias": null,
|
4569 |
"enable_norm": true,
|
4570 |
+
"enable_perm": false,
|
4571 |
"group_num": 1,
|
4572 |
"group_size": 13824,
|
4573 |
"in_features": 13824,
|
|
|
4591 |
"model.layers.33.mlp.gate_proj": {
|
4592 |
"bias": null,
|
4593 |
"enable_norm": true,
|
4594 |
+
"enable_perm": false,
|
4595 |
"group_num": 1,
|
4596 |
"group_size": 5120,
|
4597 |
"in_features": 5120,
|
|
|
4615 |
"model.layers.33.mlp.up_proj": {
|
4616 |
"bias": null,
|
4617 |
"enable_norm": true,
|
4618 |
+
"enable_perm": false,
|
4619 |
"group_num": 1,
|
4620 |
"group_size": 5120,
|
4621 |
"in_features": 5120,
|
|
|
4639 |
"model.layers.33.self_attn.k_proj": {
|
4640 |
"bias": true,
|
4641 |
"enable_norm": true,
|
4642 |
+
"enable_perm": false,
|
4643 |
"group_num": 1,
|
4644 |
"group_size": 5120,
|
4645 |
"in_features": 5120,
|
|
|
4663 |
"model.layers.33.self_attn.o_proj": {
|
4664 |
"bias": null,
|
4665 |
"enable_norm": true,
|
4666 |
+
"enable_perm": false,
|
4667 |
"group_num": 1,
|
4668 |
"group_size": 5120,
|
4669 |
"in_features": 5120,
|
|
|
4687 |
"model.layers.33.self_attn.q_proj": {
|
4688 |
"bias": true,
|
4689 |
"enable_norm": true,
|
4690 |
+
"enable_perm": false,
|
4691 |
"group_num": 1,
|
4692 |
"group_size": 5120,
|
4693 |
"in_features": 5120,
|
|
|
4711 |
"model.layers.33.self_attn.v_proj": {
|
4712 |
"bias": true,
|
4713 |
"enable_norm": true,
|
4714 |
+
"enable_perm": false,
|
4715 |
"group_num": 1,
|
4716 |
"group_size": 5120,
|
4717 |
"in_features": 5120,
|
|
|
4735 |
"model.layers.34.mlp.down_proj": {
|
4736 |
"bias": null,
|
4737 |
"enable_norm": true,
|
4738 |
+
"enable_perm": false,
|
4739 |
"group_num": 1,
|
4740 |
"group_size": 13824,
|
4741 |
"in_features": 13824,
|
|
|
4759 |
"model.layers.34.mlp.gate_proj": {
|
4760 |
"bias": null,
|
4761 |
"enable_norm": true,
|
4762 |
+
"enable_perm": false,
|
4763 |
"group_num": 1,
|
4764 |
"group_size": 5120,
|
4765 |
"in_features": 5120,
|
|
|
4783 |
"model.layers.34.mlp.up_proj": {
|
4784 |
"bias": null,
|
4785 |
"enable_norm": true,
|
4786 |
+
"enable_perm": false,
|
4787 |
"group_num": 1,
|
4788 |
"group_size": 5120,
|
4789 |
"in_features": 5120,
|
|
|
4807 |
"model.layers.34.self_attn.k_proj": {
|
4808 |
"bias": true,
|
4809 |
"enable_norm": true,
|
4810 |
+
"enable_perm": false,
|
4811 |
"group_num": 1,
|
4812 |
"group_size": 5120,
|
4813 |
"in_features": 5120,
|
|
|
4831 |
"model.layers.34.self_attn.o_proj": {
|
4832 |
"bias": null,
|
4833 |
"enable_norm": true,
|
4834 |
+
"enable_perm": false,
|
4835 |
"group_num": 1,
|
4836 |
"group_size": 5120,
|
4837 |
"in_features": 5120,
|
|
|
4855 |
"model.layers.34.self_attn.q_proj": {
|
4856 |
"bias": true,
|
4857 |
"enable_norm": true,
|
4858 |
+
"enable_perm": false,
|
4859 |
"group_num": 1,
|
4860 |
"group_size": 5120,
|
4861 |
"in_features": 5120,
|
|
|
4879 |
"model.layers.34.self_attn.v_proj": {
|
4880 |
"bias": true,
|
4881 |
"enable_norm": true,
|
4882 |
+
"enable_perm": false,
|
4883 |
"group_num": 1,
|
4884 |
"group_size": 5120,
|
4885 |
"in_features": 5120,
|
|
|
4903 |
"model.layers.35.mlp.down_proj": {
|
4904 |
"bias": null,
|
4905 |
"enable_norm": true,
|
4906 |
+
"enable_perm": false,
|
4907 |
"group_num": 1,
|
4908 |
"group_size": 13824,
|
4909 |
"in_features": 13824,
|
|
|
4927 |
"model.layers.35.mlp.gate_proj": {
|
4928 |
"bias": null,
|
4929 |
"enable_norm": true,
|
4930 |
+
"enable_perm": false,
|
4931 |
"group_num": 1,
|
4932 |
"group_size": 5120,
|
4933 |
"in_features": 5120,
|
|
|
4951 |
"model.layers.35.mlp.up_proj": {
|
4952 |
"bias": null,
|
4953 |
"enable_norm": true,
|
4954 |
+
"enable_perm": false,
|
4955 |
"group_num": 1,
|
4956 |
"group_size": 5120,
|
4957 |
"in_features": 5120,
|
|
|
4975 |
"model.layers.35.self_attn.k_proj": {
|
4976 |
"bias": true,
|
4977 |
"enable_norm": true,
|
4978 |
+
"enable_perm": false,
|
4979 |
"group_num": 1,
|
4980 |
"group_size": 5120,
|
4981 |
"in_features": 5120,
|
|
|
4999 |
"model.layers.35.self_attn.o_proj": {
|
5000 |
"bias": null,
|
5001 |
"enable_norm": true,
|
5002 |
+
"enable_perm": false,
|
5003 |
"group_num": 1,
|
5004 |
"group_size": 5120,
|
5005 |
"in_features": 5120,
|
|
|
5023 |
"model.layers.35.self_attn.q_proj": {
|
5024 |
"bias": true,
|
5025 |
"enable_norm": true,
|
5026 |
+
"enable_perm": false,
|
5027 |
"group_num": 1,
|
5028 |
"group_size": 5120,
|
5029 |
"in_features": 5120,
|
|
|
5047 |
"model.layers.35.self_attn.v_proj": {
|
5048 |
"bias": true,
|
5049 |
"enable_norm": true,
|
5050 |
+
"enable_perm": false,
|
5051 |
"group_num": 1,
|
5052 |
"group_size": 5120,
|
5053 |
"in_features": 5120,
|
|
|
5071 |
"model.layers.36.mlp.down_proj": {
|
5072 |
"bias": null,
|
5073 |
"enable_norm": true,
|
5074 |
+
"enable_perm": false,
|
5075 |
"group_num": 1,
|
5076 |
"group_size": 13824,
|
5077 |
"in_features": 13824,
|
|
|
5095 |
"model.layers.36.mlp.gate_proj": {
|
5096 |
"bias": null,
|
5097 |
"enable_norm": true,
|
5098 |
+
"enable_perm": false,
|
5099 |
"group_num": 1,
|
5100 |
"group_size": 5120,
|
5101 |
"in_features": 5120,
|
|
|
5119 |
"model.layers.36.mlp.up_proj": {
|
5120 |
"bias": null,
|
5121 |
"enable_norm": true,
|
5122 |
+
"enable_perm": false,
|
5123 |
"group_num": 1,
|
5124 |
"group_size": 5120,
|
5125 |
"in_features": 5120,
|
|
|
5143 |
"model.layers.36.self_attn.k_proj": {
|
5144 |
"bias": true,
|
5145 |
"enable_norm": true,
|
5146 |
+
"enable_perm": false,
|
5147 |
"group_num": 1,
|
5148 |
"group_size": 5120,
|
5149 |
"in_features": 5120,
|
|
|
5167 |
"model.layers.36.self_attn.o_proj": {
|
5168 |
"bias": null,
|
5169 |
"enable_norm": true,
|
5170 |
+
"enable_perm": false,
|
5171 |
"group_num": 1,
|
5172 |
"group_size": 5120,
|
5173 |
"in_features": 5120,
|
|
|
5191 |
"model.layers.36.self_attn.q_proj": {
|
5192 |
"bias": true,
|
5193 |
"enable_norm": true,
|
5194 |
+
"enable_perm": false,
|
5195 |
"group_num": 1,
|
5196 |
"group_size": 5120,
|
5197 |
"in_features": 5120,
|
|
|
5215 |
"model.layers.36.self_attn.v_proj": {
|
5216 |
"bias": true,
|
5217 |
"enable_norm": true,
|
5218 |
+
"enable_perm": false,
|
5219 |
"group_num": 1,
|
5220 |
"group_size": 5120,
|
5221 |
"in_features": 5120,
|
|
|
5239 |
"model.layers.37.mlp.down_proj": {
|
5240 |
"bias": null,
|
5241 |
"enable_norm": true,
|
5242 |
+
"enable_perm": false,
|
5243 |
"group_num": 1,
|
5244 |
"group_size": 13824,
|
5245 |
"in_features": 13824,
|
|
|
5263 |
"model.layers.37.mlp.gate_proj": {
|
5264 |
"bias": null,
|
5265 |
"enable_norm": true,
|
5266 |
+
"enable_perm": false,
|
5267 |
"group_num": 1,
|
5268 |
"group_size": 5120,
|
5269 |
"in_features": 5120,
|
|
|
5287 |
"model.layers.37.mlp.up_proj": {
|
5288 |
"bias": null,
|
5289 |
"enable_norm": true,
|
5290 |
+
"enable_perm": false,
|
5291 |
"group_num": 1,
|
5292 |
"group_size": 5120,
|
5293 |
"in_features": 5120,
|
|
|
5311 |
"model.layers.37.self_attn.k_proj": {
|
5312 |
"bias": true,
|
5313 |
"enable_norm": true,
|
5314 |
+
"enable_perm": false,
|
5315 |
"group_num": 1,
|
5316 |
"group_size": 5120,
|
5317 |
"in_features": 5120,
|
|
|
5335 |
"model.layers.37.self_attn.o_proj": {
|
5336 |
"bias": null,
|
5337 |
"enable_norm": true,
|
5338 |
+
"enable_perm": false,
|
5339 |
"group_num": 1,
|
5340 |
"group_size": 5120,
|
5341 |
"in_features": 5120,
|
|
|
5359 |
"model.layers.37.self_attn.q_proj": {
|
5360 |
"bias": true,
|
5361 |
"enable_norm": true,
|
5362 |
+
"enable_perm": false,
|
5363 |
"group_num": 1,
|
5364 |
"group_size": 5120,
|
5365 |
"in_features": 5120,
|
|
|
5383 |
"model.layers.37.self_attn.v_proj": {
|
5384 |
"bias": true,
|
5385 |
"enable_norm": true,
|
5386 |
+
"enable_perm": false,
|
5387 |
"group_num": 1,
|
5388 |
"group_size": 5120,
|
5389 |
"in_features": 5120,
|
|
|
5407 |
"model.layers.38.mlp.down_proj": {
|
5408 |
"bias": null,
|
5409 |
"enable_norm": true,
|
5410 |
+
"enable_perm": false,
|
5411 |
"group_num": 1,
|
5412 |
"group_size": 13824,
|
5413 |
"in_features": 13824,
|
|
|
5431 |
"model.layers.38.mlp.gate_proj": {
|
5432 |
"bias": null,
|
5433 |
"enable_norm": true,
|
5434 |
+
"enable_perm": false,
|
5435 |
"group_num": 1,
|
5436 |
"group_size": 5120,
|
5437 |
"in_features": 5120,
|
|
|
5455 |
"model.layers.38.mlp.up_proj": {
|
5456 |
"bias": null,
|
5457 |
"enable_norm": true,
|
5458 |
+
"enable_perm": false,
|
5459 |
"group_num": 1,
|
5460 |
"group_size": 5120,
|
5461 |
"in_features": 5120,
|
|
|
5479 |
"model.layers.38.self_attn.k_proj": {
|
5480 |
"bias": true,
|
5481 |
"enable_norm": true,
|
5482 |
+
"enable_perm": false,
|
5483 |
"group_num": 1,
|
5484 |
"group_size": 5120,
|
5485 |
"in_features": 5120,
|
|
|
5503 |
"model.layers.38.self_attn.o_proj": {
|
5504 |
"bias": null,
|
5505 |
"enable_norm": true,
|
5506 |
+
"enable_perm": false,
|
5507 |
"group_num": 1,
|
5508 |
"group_size": 5120,
|
5509 |
"in_features": 5120,
|
|
|
5527 |
"model.layers.38.self_attn.q_proj": {
|
5528 |
"bias": true,
|
5529 |
"enable_norm": true,
|
5530 |
+
"enable_perm": false,
|
5531 |
"group_num": 1,
|
5532 |
"group_size": 5120,
|
5533 |
"in_features": 5120,
|
|
|
5551 |
"model.layers.38.self_attn.v_proj": {
|
5552 |
"bias": true,
|
5553 |
"enable_norm": true,
|
5554 |
+
"enable_perm": false,
|
5555 |
"group_num": 1,
|
5556 |
"group_size": 5120,
|
5557 |
"in_features": 5120,
|
|
|
5575 |
"model.layers.39.mlp.down_proj": {
|
5576 |
"bias": null,
|
5577 |
"enable_norm": true,
|
5578 |
+
"enable_perm": false,
|
5579 |
"group_num": 1,
|
5580 |
"group_size": 13824,
|
5581 |
"in_features": 13824,
|
|
|
5599 |
"model.layers.39.mlp.gate_proj": {
|
5600 |
"bias": null,
|
5601 |
"enable_norm": true,
|
5602 |
+
"enable_perm": false,
|
5603 |
"group_num": 1,
|
5604 |
"group_size": 5120,
|
5605 |
"in_features": 5120,
|
|
|
5623 |
"model.layers.39.mlp.up_proj": {
|
5624 |
"bias": null,
|
5625 |
"enable_norm": true,
|
5626 |
+
"enable_perm": false,
|
5627 |
"group_num": 1,
|
5628 |
"group_size": 5120,
|
5629 |
"in_features": 5120,
|
|
|
5647 |
"model.layers.39.self_attn.k_proj": {
|
5648 |
"bias": true,
|
5649 |
"enable_norm": true,
|
5650 |
+
"enable_perm": false,
|
5651 |
"group_num": 1,
|
5652 |
"group_size": 5120,
|
5653 |
"in_features": 5120,
|
|
|
5671 |
"model.layers.39.self_attn.o_proj": {
|
5672 |
"bias": null,
|
5673 |
"enable_norm": true,
|
5674 |
+
"enable_perm": false,
|
5675 |
"group_num": 1,
|
5676 |
"group_size": 5120,
|
5677 |
"in_features": 5120,
|
|
|
5695 |
"model.layers.39.self_attn.q_proj": {
|
5696 |
"bias": true,
|
5697 |
"enable_norm": true,
|
5698 |
+
"enable_perm": false,
|
5699 |
"group_num": 1,
|
5700 |
"group_size": 5120,
|
5701 |
"in_features": 5120,
|
|
|
5719 |
"model.layers.39.self_attn.v_proj": {
|
5720 |
"bias": true,
|
5721 |
"enable_norm": true,
|
5722 |
+
"enable_perm": false,
|
5723 |
"group_num": 1,
|
5724 |
"group_size": 5120,
|
5725 |
"in_features": 5120,
|
|
|
5743 |
"model.layers.4.mlp.down_proj": {
|
5744 |
"bias": null,
|
5745 |
"enable_norm": true,
|
5746 |
+
"enable_perm": false,
|
5747 |
"group_num": 1,
|
5748 |
"group_size": 13824,
|
5749 |
"in_features": 13824,
|
|
|
5767 |
"model.layers.4.mlp.gate_proj": {
|
5768 |
"bias": null,
|
5769 |
"enable_norm": true,
|
5770 |
+
"enable_perm": false,
|
5771 |
"group_num": 1,
|
5772 |
"group_size": 5120,
|
5773 |
"in_features": 5120,
|
|
|
5791 |
"model.layers.4.mlp.up_proj": {
|
5792 |
"bias": null,
|
5793 |
"enable_norm": true,
|
5794 |
+
"enable_perm": false,
|
5795 |
"group_num": 1,
|
5796 |
"group_size": 5120,
|
5797 |
"in_features": 5120,
|
|
|
5815 |
"model.layers.4.self_attn.k_proj": {
|
5816 |
"bias": true,
|
5817 |
"enable_norm": true,
|
5818 |
+
"enable_perm": false,
|
5819 |
"group_num": 1,
|
5820 |
"group_size": 5120,
|
5821 |
"in_features": 5120,
|
|
|
5839 |
"model.layers.4.self_attn.o_proj": {
|
5840 |
"bias": null,
|
5841 |
"enable_norm": true,
|
5842 |
+
"enable_perm": false,
|
5843 |
"group_num": 1,
|
5844 |
"group_size": 5120,
|
5845 |
"in_features": 5120,
|
|
|
5863 |
"model.layers.4.self_attn.q_proj": {
|
5864 |
"bias": true,
|
5865 |
"enable_norm": true,
|
5866 |
+
"enable_perm": false,
|
5867 |
"group_num": 1,
|
5868 |
"group_size": 5120,
|
5869 |
"in_features": 5120,
|
|
|
5887 |
"model.layers.4.self_attn.v_proj": {
|
5888 |
"bias": true,
|
5889 |
"enable_norm": true,
|
5890 |
+
"enable_perm": false,
|
5891 |
"group_num": 1,
|
5892 |
"group_size": 5120,
|
5893 |
"in_features": 5120,
|
|
|
5911 |
"model.layers.40.mlp.down_proj": {
|
5912 |
"bias": null,
|
5913 |
"enable_norm": true,
|
5914 |
+
"enable_perm": false,
|
5915 |
"group_num": 1,
|
5916 |
"group_size": 13824,
|
5917 |
"in_features": 13824,
|
|
|
5935 |
"model.layers.40.mlp.gate_proj": {
|
5936 |
"bias": null,
|
5937 |
"enable_norm": true,
|
5938 |
+
"enable_perm": false,
|
5939 |
"group_num": 1,
|
5940 |
"group_size": 5120,
|
5941 |
"in_features": 5120,
|
|
|
5959 |
"model.layers.40.mlp.up_proj": {
|
5960 |
"bias": null,
|
5961 |
"enable_norm": true,
|
5962 |
+
"enable_perm": false,
|
5963 |
"group_num": 1,
|
5964 |
"group_size": 5120,
|
5965 |
"in_features": 5120,
|
|
|
5983 |
"model.layers.40.self_attn.k_proj": {
|
5984 |
"bias": true,
|
5985 |
"enable_norm": true,
|
5986 |
+
"enable_perm": false,
|
5987 |
"group_num": 1,
|
5988 |
"group_size": 5120,
|
5989 |
"in_features": 5120,
|
|
|
6007 |
"model.layers.40.self_attn.o_proj": {
|
6008 |
"bias": null,
|
6009 |
"enable_norm": true,
|
6010 |
+
"enable_perm": false,
|
6011 |
"group_num": 1,
|
6012 |
"group_size": 5120,
|
6013 |
"in_features": 5120,
|
|
|
6031 |
"model.layers.40.self_attn.q_proj": {
|
6032 |
"bias": true,
|
6033 |
"enable_norm": true,
|
6034 |
+
"enable_perm": false,
|
6035 |
"group_num": 1,
|
6036 |
"group_size": 5120,
|
6037 |
"in_features": 5120,
|
|
|
6055 |
"model.layers.40.self_attn.v_proj": {
|
6056 |
"bias": true,
|
6057 |
"enable_norm": true,
|
6058 |
+
"enable_perm": false,
|
6059 |
"group_num": 1,
|
6060 |
"group_size": 5120,
|
6061 |
"in_features": 5120,
|
|
|
6079 |
"model.layers.41.mlp.down_proj": {
|
6080 |
"bias": null,
|
6081 |
"enable_norm": true,
|
6082 |
+
"enable_perm": false,
|
6083 |
"group_num": 1,
|
6084 |
"group_size": 13824,
|
6085 |
"in_features": 13824,
|
|
|
6103 |
"model.layers.41.mlp.gate_proj": {
|
6104 |
"bias": null,
|
6105 |
"enable_norm": true,
|
6106 |
+
"enable_perm": false,
|
6107 |
"group_num": 1,
|
6108 |
"group_size": 5120,
|
6109 |
"in_features": 5120,
|
|
|
6127 |
"model.layers.41.mlp.up_proj": {
|
6128 |
"bias": null,
|
6129 |
"enable_norm": true,
|
6130 |
+
"enable_perm": false,
|
6131 |
"group_num": 1,
|
6132 |
"group_size": 5120,
|
6133 |
"in_features": 5120,
|
|
|
6151 |
"model.layers.41.self_attn.k_proj": {
|
6152 |
"bias": true,
|
6153 |
"enable_norm": true,
|
6154 |
+
"enable_perm": false,
|
6155 |
"group_num": 1,
|
6156 |
"group_size": 5120,
|
6157 |
"in_features": 5120,
|
|
|
6175 |
"model.layers.41.self_attn.o_proj": {
|
6176 |
"bias": null,
|
6177 |
"enable_norm": true,
|
6178 |
+
"enable_perm": false,
|
6179 |
"group_num": 1,
|
6180 |
"group_size": 5120,
|
6181 |
"in_features": 5120,
|
|
|
6199 |
"model.layers.41.self_attn.q_proj": {
|
6200 |
"bias": true,
|
6201 |
"enable_norm": true,
|
6202 |
+
"enable_perm": false,
|
6203 |
"group_num": 1,
|
6204 |
"group_size": 5120,
|
6205 |
"in_features": 5120,
|
|
|
6223 |
"model.layers.41.self_attn.v_proj": {
|
6224 |
"bias": true,
|
6225 |
"enable_norm": true,
|
6226 |
+
"enable_perm": false,
|
6227 |
"group_num": 1,
|
6228 |
"group_size": 5120,
|
6229 |
"in_features": 5120,
|
|
|
6247 |
"model.layers.42.mlp.down_proj": {
|
6248 |
"bias": null,
|
6249 |
"enable_norm": true,
|
6250 |
+
"enable_perm": false,
|
6251 |
"group_num": 1,
|
6252 |
"group_size": 13824,
|
6253 |
"in_features": 13824,
|
|
|
6271 |
"model.layers.42.mlp.gate_proj": {
|
6272 |
"bias": null,
|
6273 |
"enable_norm": true,
|
6274 |
+
"enable_perm": false,
|
6275 |
"group_num": 1,
|
6276 |
"group_size": 5120,
|
6277 |
"in_features": 5120,
|
|
|
6295 |
"model.layers.42.mlp.up_proj": {
|
6296 |
"bias": null,
|
6297 |
"enable_norm": true,
|
6298 |
+
"enable_perm": false,
|
6299 |
"group_num": 1,
|
6300 |
"group_size": 5120,
|
6301 |
"in_features": 5120,
|
|
|
6319 |
"model.layers.42.self_attn.k_proj": {
|
6320 |
"bias": true,
|
6321 |
"enable_norm": true,
|
6322 |
+
"enable_perm": false,
|
6323 |
"group_num": 1,
|
6324 |
"group_size": 5120,
|
6325 |
"in_features": 5120,
|
|
|
6343 |
"model.layers.42.self_attn.o_proj": {
|
6344 |
"bias": null,
|
6345 |
"enable_norm": true,
|
6346 |
+
"enable_perm": false,
|
6347 |
"group_num": 1,
|
6348 |
"group_size": 5120,
|
6349 |
"in_features": 5120,
|
|
|
6367 |
"model.layers.42.self_attn.q_proj": {
|
6368 |
"bias": true,
|
6369 |
"enable_norm": true,
|
6370 |
+
"enable_perm": false,
|
6371 |
"group_num": 1,
|
6372 |
"group_size": 5120,
|
6373 |
"in_features": 5120,
|
|
|
6391 |
"model.layers.42.self_attn.v_proj": {
|
6392 |
"bias": true,
|
6393 |
"enable_norm": true,
|
6394 |
+
"enable_perm": false,
|
6395 |
"group_num": 1,
|
6396 |
"group_size": 5120,
|
6397 |
"in_features": 5120,
|
|
|
6415 |
"model.layers.43.mlp.down_proj": {
|
6416 |
"bias": null,
|
6417 |
"enable_norm": true,
|
6418 |
+
"enable_perm": false,
|
6419 |
"group_num": 1,
|
6420 |
"group_size": 13824,
|
6421 |
"in_features": 13824,
|
|
|
6439 |
"model.layers.43.mlp.gate_proj": {
|
6440 |
"bias": null,
|
6441 |
"enable_norm": true,
|
6442 |
+
"enable_perm": false,
|
6443 |
"group_num": 1,
|
6444 |
"group_size": 5120,
|
6445 |
"in_features": 5120,
|
|
|
6463 |
"model.layers.43.mlp.up_proj": {
|
6464 |
"bias": null,
|
6465 |
"enable_norm": true,
|
6466 |
+
"enable_perm": false,
|
6467 |
"group_num": 1,
|
6468 |
"group_size": 5120,
|
6469 |
"in_features": 5120,
|
|
|
6487 |
"model.layers.43.self_attn.k_proj": {
|
6488 |
"bias": true,
|
6489 |
"enable_norm": true,
|
6490 |
+
"enable_perm": false,
|
6491 |
"group_num": 1,
|
6492 |
"group_size": 5120,
|
6493 |
"in_features": 5120,
|
|
|
6511 |
"model.layers.43.self_attn.o_proj": {
|
6512 |
"bias": null,
|
6513 |
"enable_norm": true,
|
6514 |
+
"enable_perm": false,
|
6515 |
"group_num": 1,
|
6516 |
"group_size": 5120,
|
6517 |
"in_features": 5120,
|
|
|
6535 |
"model.layers.43.self_attn.q_proj": {
|
6536 |
"bias": true,
|
6537 |
"enable_norm": true,
|
6538 |
+
"enable_perm": false,
|
6539 |
"group_num": 1,
|
6540 |
"group_size": 5120,
|
6541 |
"in_features": 5120,
|
|
|
6559 |
"model.layers.43.self_attn.v_proj": {
|
6560 |
"bias": true,
|
6561 |
"enable_norm": true,
|
6562 |
+
"enable_perm": false,
|
6563 |
"group_num": 1,
|
6564 |
"group_size": 5120,
|
6565 |
"in_features": 5120,
|
|
|
6583 |
"model.layers.44.mlp.down_proj": {
|
6584 |
"bias": null,
|
6585 |
"enable_norm": true,
|
6586 |
+
"enable_perm": false,
|
6587 |
"group_num": 1,
|
6588 |
"group_size": 13824,
|
6589 |
"in_features": 13824,
|
|
|
6607 |
"model.layers.44.mlp.gate_proj": {
|
6608 |
"bias": null,
|
6609 |
"enable_norm": true,
|
6610 |
+
"enable_perm": false,
|
6611 |
"group_num": 1,
|
6612 |
"group_size": 5120,
|
6613 |
"in_features": 5120,
|
|
|
6631 |
"model.layers.44.mlp.up_proj": {
|
6632 |
"bias": null,
|
6633 |
"enable_norm": true,
|
6634 |
+
"enable_perm": false,
|
6635 |
"group_num": 1,
|
6636 |
"group_size": 5120,
|
6637 |
"in_features": 5120,
|
|
|
6655 |
"model.layers.44.self_attn.k_proj": {
|
6656 |
"bias": true,
|
6657 |
"enable_norm": true,
|
6658 |
+
"enable_perm": false,
|
6659 |
"group_num": 1,
|
6660 |
"group_size": 5120,
|
6661 |
"in_features": 5120,
|
|
|
6679 |
"model.layers.44.self_attn.o_proj": {
|
6680 |
"bias": null,
|
6681 |
"enable_norm": true,
|
6682 |
+
"enable_perm": false,
|
6683 |
"group_num": 1,
|
6684 |
"group_size": 5120,
|
6685 |
"in_features": 5120,
|
|
|
6703 |
"model.layers.44.self_attn.q_proj": {
|
6704 |
"bias": true,
|
6705 |
"enable_norm": true,
|
6706 |
+
"enable_perm": false,
|
6707 |
"group_num": 1,
|
6708 |
"group_size": 5120,
|
6709 |
"in_features": 5120,
|
|
|
6727 |
"model.layers.44.self_attn.v_proj": {
|
6728 |
"bias": true,
|
6729 |
"enable_norm": true,
|
6730 |
+
"enable_perm": false,
|
6731 |
"group_num": 1,
|
6732 |
"group_size": 5120,
|
6733 |
"in_features": 5120,
|
|
|
6751 |
"model.layers.45.mlp.down_proj": {
|
6752 |
"bias": null,
|
6753 |
"enable_norm": true,
|
6754 |
+
"enable_perm": false,
|
6755 |
"group_num": 1,
|
6756 |
"group_size": 13824,
|
6757 |
"in_features": 13824,
|
|
|
6775 |
"model.layers.45.mlp.gate_proj": {
|
6776 |
"bias": null,
|
6777 |
"enable_norm": true,
|
6778 |
+
"enable_perm": false,
|
6779 |
"group_num": 1,
|
6780 |
"group_size": 5120,
|
6781 |
"in_features": 5120,
|
|
|
6799 |
"model.layers.45.mlp.up_proj": {
|
6800 |
"bias": null,
|
6801 |
"enable_norm": true,
|
6802 |
+
"enable_perm": false,
|
6803 |
"group_num": 1,
|
6804 |
"group_size": 5120,
|
6805 |
"in_features": 5120,
|
|
|
6823 |
"model.layers.45.self_attn.k_proj": {
|
6824 |
"bias": true,
|
6825 |
"enable_norm": true,
|
6826 |
+
"enable_perm": false,
|
6827 |
"group_num": 1,
|
6828 |
"group_size": 5120,
|
6829 |
"in_features": 5120,
|
|
|
6847 |
"model.layers.45.self_attn.o_proj": {
|
6848 |
"bias": null,
|
6849 |
"enable_norm": true,
|
6850 |
+
"enable_perm": false,
|
6851 |
"group_num": 1,
|
6852 |
"group_size": 5120,
|
6853 |
"in_features": 5120,
|
|
|
6871 |
"model.layers.45.self_attn.q_proj": {
|
6872 |
"bias": true,
|
6873 |
"enable_norm": true,
|
6874 |
+
"enable_perm": false,
|
6875 |
"group_num": 1,
|
6876 |
"group_size": 5120,
|
6877 |
"in_features": 5120,
|
|
|
6895 |
"model.layers.45.self_attn.v_proj": {
|
6896 |
"bias": true,
|
6897 |
"enable_norm": true,
|
6898 |
+
"enable_perm": false,
|
6899 |
"group_num": 1,
|
6900 |
"group_size": 5120,
|
6901 |
"in_features": 5120,
|
|
|
6919 |
"model.layers.46.mlp.down_proj": {
|
6920 |
"bias": null,
|
6921 |
"enable_norm": true,
|
6922 |
+
"enable_perm": false,
|
6923 |
"group_num": 1,
|
6924 |
"group_size": 13824,
|
6925 |
"in_features": 13824,
|
|
|
6943 |
"model.layers.46.mlp.gate_proj": {
|
6944 |
"bias": null,
|
6945 |
"enable_norm": true,
|
6946 |
+
"enable_perm": false,
|
6947 |
"group_num": 1,
|
6948 |
"group_size": 5120,
|
6949 |
"in_features": 5120,
|
|
|
6967 |
"model.layers.46.mlp.up_proj": {
|
6968 |
"bias": null,
|
6969 |
"enable_norm": true,
|
6970 |
+
"enable_perm": false,
|
6971 |
"group_num": 1,
|
6972 |
"group_size": 5120,
|
6973 |
"in_features": 5120,
|
|
|
6991 |
"model.layers.46.self_attn.k_proj": {
|
6992 |
"bias": true,
|
6993 |
"enable_norm": true,
|
6994 |
+
"enable_perm": false,
|
6995 |
"group_num": 1,
|
6996 |
"group_size": 5120,
|
6997 |
"in_features": 5120,
|
|
|
7015 |
"model.layers.46.self_attn.o_proj": {
|
7016 |
"bias": null,
|
7017 |
"enable_norm": true,
|
7018 |
+
"enable_perm": false,
|
7019 |
"group_num": 1,
|
7020 |
"group_size": 5120,
|
7021 |
"in_features": 5120,
|
|
|
7039 |
"model.layers.46.self_attn.q_proj": {
|
7040 |
"bias": true,
|
7041 |
"enable_norm": true,
|
7042 |
+
"enable_perm": false,
|
7043 |
"group_num": 1,
|
7044 |
"group_size": 5120,
|
7045 |
"in_features": 5120,
|
|
|
7063 |
"model.layers.46.self_attn.v_proj": {
|
7064 |
"bias": true,
|
7065 |
"enable_norm": true,
|
7066 |
+
"enable_perm": false,
|
7067 |
"group_num": 1,
|
7068 |
"group_size": 5120,
|
7069 |
"in_features": 5120,
|
|
|
7087 |
"model.layers.47.mlp.down_proj": {
|
7088 |
"bias": null,
|
7089 |
"enable_norm": true,
|
7090 |
+
"enable_perm": false,
|
7091 |
"group_num": 1,
|
7092 |
"group_size": 13824,
|
7093 |
"in_features": 13824,
|
|
|
7111 |
"model.layers.47.mlp.gate_proj": {
|
7112 |
"bias": null,
|
7113 |
"enable_norm": true,
|
7114 |
+
"enable_perm": false,
|
7115 |
"group_num": 1,
|
7116 |
"group_size": 5120,
|
7117 |
"in_features": 5120,
|
|
|
7135 |
"model.layers.47.mlp.up_proj": {
|
7136 |
"bias": null,
|
7137 |
"enable_norm": true,
|
7138 |
+
"enable_perm": false,
|
7139 |
"group_num": 1,
|
7140 |
"group_size": 5120,
|
7141 |
"in_features": 5120,
|
|
|
7159 |
"model.layers.47.self_attn.k_proj": {
|
7160 |
"bias": true,
|
7161 |
"enable_norm": true,
|
7162 |
+
"enable_perm": false,
|
7163 |
"group_num": 1,
|
7164 |
"group_size": 5120,
|
7165 |
"in_features": 5120,
|
|
|
7183 |
"model.layers.47.self_attn.o_proj": {
|
7184 |
"bias": null,
|
7185 |
"enable_norm": true,
|
7186 |
+
"enable_perm": false,
|
7187 |
"group_num": 1,
|
7188 |
"group_size": 5120,
|
7189 |
"in_features": 5120,
|
|
|
7207 |
"model.layers.47.self_attn.q_proj": {
|
7208 |
"bias": true,
|
7209 |
"enable_norm": true,
|
7210 |
+
"enable_perm": false,
|
7211 |
"group_num": 1,
|
7212 |
"group_size": 5120,
|
7213 |
"in_features": 5120,
|
|
|
7231 |
"model.layers.47.self_attn.v_proj": {
|
7232 |
"bias": true,
|
7233 |
"enable_norm": true,
|
7234 |
+
"enable_perm": false,
|
7235 |
"group_num": 1,
|
7236 |
"group_size": 5120,
|
7237 |
"in_features": 5120,
|
|
|
7255 |
"model.layers.5.mlp.down_proj": {
|
7256 |
"bias": null,
|
7257 |
"enable_norm": true,
|
7258 |
+
"enable_perm": false,
|
7259 |
"group_num": 1,
|
7260 |
"group_size": 13824,
|
7261 |
"in_features": 13824,
|
|
|
7279 |
"model.layers.5.mlp.gate_proj": {
|
7280 |
"bias": null,
|
7281 |
"enable_norm": true,
|
7282 |
+
"enable_perm": false,
|
7283 |
"group_num": 1,
|
7284 |
"group_size": 5120,
|
7285 |
"in_features": 5120,
|
|
|
7303 |
"model.layers.5.mlp.up_proj": {
|
7304 |
"bias": null,
|
7305 |
"enable_norm": true,
|
7306 |
+
"enable_perm": false,
|
7307 |
"group_num": 1,
|
7308 |
"group_size": 5120,
|
7309 |
"in_features": 5120,
|
|
|
7327 |
"model.layers.5.self_attn.k_proj": {
|
7328 |
"bias": true,
|
7329 |
"enable_norm": true,
|
7330 |
+
"enable_perm": false,
|
7331 |
"group_num": 1,
|
7332 |
"group_size": 5120,
|
7333 |
"in_features": 5120,
|
|
|
7351 |
"model.layers.5.self_attn.o_proj": {
|
7352 |
"bias": null,
|
7353 |
"enable_norm": true,
|
7354 |
+
"enable_perm": false,
|
7355 |
"group_num": 1,
|
7356 |
"group_size": 5120,
|
7357 |
"in_features": 5120,
|
|
|
7375 |
"model.layers.5.self_attn.q_proj": {
|
7376 |
"bias": true,
|
7377 |
"enable_norm": true,
|
7378 |
+
"enable_perm": false,
|
7379 |
"group_num": 1,
|
7380 |
"group_size": 5120,
|
7381 |
"in_features": 5120,
|
|
|
7399 |
"model.layers.5.self_attn.v_proj": {
|
7400 |
"bias": true,
|
7401 |
"enable_norm": true,
|
7402 |
+
"enable_perm": false,
|
7403 |
"group_num": 1,
|
7404 |
"group_size": 5120,
|
7405 |
"in_features": 5120,
|
|
|
7423 |
"model.layers.6.mlp.down_proj": {
|
7424 |
"bias": null,
|
7425 |
"enable_norm": true,
|
7426 |
+
"enable_perm": false,
|
7427 |
"group_num": 1,
|
7428 |
"group_size": 13824,
|
7429 |
"in_features": 13824,
|
|
|
7447 |
"model.layers.6.mlp.gate_proj": {
|
7448 |
"bias": null,
|
7449 |
"enable_norm": true,
|
7450 |
+
"enable_perm": false,
|
7451 |
"group_num": 1,
|
7452 |
"group_size": 5120,
|
7453 |
"in_features": 5120,
|
|
|
7471 |
"model.layers.6.mlp.up_proj": {
|
7472 |
"bias": null,
|
7473 |
"enable_norm": true,
|
7474 |
+
"enable_perm": false,
|
7475 |
"group_num": 1,
|
7476 |
"group_size": 5120,
|
7477 |
"in_features": 5120,
|
|
|
7495 |
"model.layers.6.self_attn.k_proj": {
|
7496 |
"bias": true,
|
7497 |
"enable_norm": true,
|
7498 |
+
"enable_perm": false,
|
7499 |
"group_num": 1,
|
7500 |
"group_size": 5120,
|
7501 |
"in_features": 5120,
|
|
|
7519 |
"model.layers.6.self_attn.o_proj": {
|
7520 |
"bias": null,
|
7521 |
"enable_norm": true,
|
7522 |
+
"enable_perm": false,
|
7523 |
"group_num": 1,
|
7524 |
"group_size": 5120,
|
7525 |
"in_features": 5120,
|
|
|
7543 |
"model.layers.6.self_attn.q_proj": {
|
7544 |
"bias": true,
|
7545 |
"enable_norm": true,
|
7546 |
+
"enable_perm": false,
|
7547 |
"group_num": 1,
|
7548 |
"group_size": 5120,
|
7549 |
"in_features": 5120,
|
|
|
7567 |
"model.layers.6.self_attn.v_proj": {
|
7568 |
"bias": true,
|
7569 |
"enable_norm": true,
|
7570 |
+
"enable_perm": false,
|
7571 |
"group_num": 1,
|
7572 |
"group_size": 5120,
|
7573 |
"in_features": 5120,
|
|
|
7591 |
"model.layers.7.mlp.down_proj": {
|
7592 |
"bias": null,
|
7593 |
"enable_norm": true,
|
7594 |
+
"enable_perm": false,
|
7595 |
"group_num": 1,
|
7596 |
"group_size": 13824,
|
7597 |
"in_features": 13824,
|
|
|
7615 |
"model.layers.7.mlp.gate_proj": {
|
7616 |
"bias": null,
|
7617 |
"enable_norm": true,
|
7618 |
+
"enable_perm": false,
|
7619 |
"group_num": 1,
|
7620 |
"group_size": 5120,
|
7621 |
"in_features": 5120,
|
|
|
7639 |
"model.layers.7.mlp.up_proj": {
|
7640 |
"bias": null,
|
7641 |
"enable_norm": true,
|
7642 |
+
"enable_perm": false,
|
7643 |
"group_num": 1,
|
7644 |
"group_size": 5120,
|
7645 |
"in_features": 5120,
|
|
|
7663 |
"model.layers.7.self_attn.k_proj": {
|
7664 |
"bias": true,
|
7665 |
"enable_norm": true,
|
7666 |
+
"enable_perm": false,
|
7667 |
"group_num": 1,
|
7668 |
"group_size": 5120,
|
7669 |
"in_features": 5120,
|
|
|
7687 |
"model.layers.7.self_attn.o_proj": {
|
7688 |
"bias": null,
|
7689 |
"enable_norm": true,
|
7690 |
+
"enable_perm": false,
|
7691 |
"group_num": 1,
|
7692 |
"group_size": 5120,
|
7693 |
"in_features": 5120,
|
|
|
7711 |
"model.layers.7.self_attn.q_proj": {
|
7712 |
"bias": true,
|
7713 |
"enable_norm": true,
|
7714 |
+
"enable_perm": false,
|
7715 |
"group_num": 1,
|
7716 |
"group_size": 5120,
|
7717 |
"in_features": 5120,
|
|
|
7735 |
"model.layers.7.self_attn.v_proj": {
|
7736 |
"bias": true,
|
7737 |
"enable_norm": true,
|
7738 |
+
"enable_perm": false,
|
7739 |
"group_num": 1,
|
7740 |
"group_size": 5120,
|
7741 |
"in_features": 5120,
|
|
|
7759 |
"model.layers.8.mlp.down_proj": {
|
7760 |
"bias": null,
|
7761 |
"enable_norm": true,
|
7762 |
+
"enable_perm": false,
|
7763 |
"group_num": 1,
|
7764 |
"group_size": 13824,
|
7765 |
"in_features": 13824,
|
|
|
7783 |
"model.layers.8.mlp.gate_proj": {
|
7784 |
"bias": null,
|
7785 |
"enable_norm": true,
|
7786 |
+
"enable_perm": false,
|
7787 |
"group_num": 1,
|
7788 |
"group_size": 5120,
|
7789 |
"in_features": 5120,
|
|
|
7807 |
"model.layers.8.mlp.up_proj": {
|
7808 |
"bias": null,
|
7809 |
"enable_norm": true,
|
7810 |
+
"enable_perm": false,
|
7811 |
"group_num": 1,
|
7812 |
"group_size": 5120,
|
7813 |
"in_features": 5120,
|
|
|
7831 |
"model.layers.8.self_attn.k_proj": {
|
7832 |
"bias": true,
|
7833 |
"enable_norm": true,
|
7834 |
+
"enable_perm": false,
|
7835 |
"group_num": 1,
|
7836 |
"group_size": 5120,
|
7837 |
"in_features": 5120,
|
|
|
7855 |
"model.layers.8.self_attn.o_proj": {
|
7856 |
"bias": null,
|
7857 |
"enable_norm": true,
|
7858 |
+
"enable_perm": false,
|
7859 |
"group_num": 1,
|
7860 |
"group_size": 5120,
|
7861 |
"in_features": 5120,
|
|
|
7879 |
"model.layers.8.self_attn.q_proj": {
|
7880 |
"bias": true,
|
7881 |
"enable_norm": true,
|
7882 |
+
"enable_perm": false,
|
7883 |
"group_num": 1,
|
7884 |
"group_size": 5120,
|
7885 |
"in_features": 5120,
|
|
|
7903 |
"model.layers.8.self_attn.v_proj": {
|
7904 |
"bias": true,
|
7905 |
"enable_norm": true,
|
7906 |
+
"enable_perm": false,
|
7907 |
"group_num": 1,
|
7908 |
"group_size": 5120,
|
7909 |
"in_features": 5120,
|
|
|
7927 |
"model.layers.9.mlp.down_proj": {
|
7928 |
"bias": null,
|
7929 |
"enable_norm": true,
|
7930 |
+
"enable_perm": false,
|
7931 |
"group_num": 1,
|
7932 |
"group_size": 13824,
|
7933 |
"in_features": 13824,
|
|
|
7951 |
"model.layers.9.mlp.gate_proj": {
|
7952 |
"bias": null,
|
7953 |
"enable_norm": true,
|
7954 |
+
"enable_perm": false,
|
7955 |
"group_num": 1,
|
7956 |
"group_size": 5120,
|
7957 |
"in_features": 5120,
|
|
|
7975 |
"model.layers.9.mlp.up_proj": {
|
7976 |
"bias": null,
|
7977 |
"enable_norm": true,
|
7978 |
+
"enable_perm": false,
|
7979 |
"group_num": 1,
|
7980 |
"group_size": 5120,
|
7981 |
"in_features": 5120,
|
|
|
7999 |
"model.layers.9.self_attn.k_proj": {
|
8000 |
"bias": true,
|
8001 |
"enable_norm": true,
|
8002 |
+
"enable_perm": false,
|
8003 |
"group_num": 1,
|
8004 |
"group_size": 5120,
|
8005 |
"in_features": 5120,
|
|
|
8023 |
"model.layers.9.self_attn.o_proj": {
|
8024 |
"bias": null,
|
8025 |
"enable_norm": true,
|
8026 |
+
"enable_perm": false,
|
8027 |
"group_num": 1,
|
8028 |
"group_size": 5120,
|
8029 |
"in_features": 5120,
|
|
|
8047 |
"model.layers.9.self_attn.q_proj": {
|
8048 |
"bias": true,
|
8049 |
"enable_norm": true,
|
8050 |
+
"enable_perm": false,
|
8051 |
"group_num": 1,
|
8052 |
"group_size": 5120,
|
8053 |
"in_features": 5120,
|
|
|
8071 |
"model.layers.9.self_attn.v_proj": {
|
8072 |
"bias": true,
|
8073 |
"enable_norm": true,
|
8074 |
+
"enable_perm": false,
|
8075 |
"group_num": 1,
|
8076 |
"group_size": 5120,
|
8077 |
"in_features": 5120,
|