Upload folder using huggingface_hub
Browse files- hidden_states.safetensors +1 -1
- job_new.json +921 -873
- measurement.json +13 -13
- output-00001-of-00002.safetensors +2 -2
- output-00002-of-00002.safetensors +2 -2
hidden_states.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2097160800
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51fa1df539e6d53a8e76a55c37fbd0bb987b123d7af8b307f99b56fe68593797
|
3 |
size 2097160800
|
job_new.json
CHANGED
@@ -16952,7 +16952,7 @@
|
|
16952 |
}
|
16953 |
},
|
16954 |
{
|
16955 |
-
"accuracy": 0.
|
16956 |
"total_bits": 899923248,
|
16957 |
"gate_proj": {
|
16958 |
"group_size": {
|
@@ -17056,7 +17056,7 @@
|
|
17056 |
}
|
17057 |
},
|
17058 |
{
|
17059 |
-
"accuracy": 0.
|
17060 |
"total_bits": 995125808,
|
17061 |
"gate_proj": {
|
17062 |
"group_size": {
|
@@ -17105,7 +17105,7 @@
|
|
17105 |
}
|
17106 |
},
|
17107 |
{
|
17108 |
-
"accuracy": 0.
|
17109 |
"total_bits": 1137906608,
|
17110 |
"gate_proj": {
|
17111 |
"group_size": {
|
@@ -17899,7 +17899,7 @@
|
|
17899 |
}
|
17900 |
},
|
17901 |
{
|
17902 |
-
"accuracy": 0.
|
17903 |
"total_bits": 206079360,
|
17904 |
"q_proj": {
|
17905 |
"group_size": {
|
@@ -21545,7 +21545,7 @@
|
|
21545 |
}
|
21546 |
},
|
21547 |
{
|
21548 |
-
"accuracy": 0.
|
21549 |
"total_bits": 143375360,
|
21550 |
"q_proj": {
|
21551 |
"group_size": {
|
@@ -24554,7 +24554,7 @@
|
|
24554 |
],
|
24555 |
"model.layers.12.mlp": [
|
24556 |
{
|
24557 |
-
"accuracy": 0.
|
24558 |
"total_bits": 614790432,
|
24559 |
"gate_proj": {
|
24560 |
"group_size": {
|
@@ -24606,7 +24606,7 @@
|
|
24606 |
}
|
24607 |
},
|
24608 |
{
|
24609 |
-
"accuracy": 0.
|
24610 |
"total_bits": 637728032,
|
24611 |
"gate_proj": {
|
24612 |
"group_size": {
|
@@ -46269,7 +46269,7 @@
|
|
46269 |
}
|
46270 |
},
|
46271 |
{
|
46272 |
-
"accuracy": 0.
|
46273 |
"total_bits": 925328640,
|
46274 |
"gate_proj": {
|
46275 |
"group_size": {
|
@@ -51868,7 +51868,7 @@
|
|
51868 |
],
|
51869 |
"model.layers.26.mlp": [
|
51870 |
{
|
51871 |
-
"accuracy": 0.
|
51872 |
"total_bits": 614790432,
|
51873 |
"gate_proj": {
|
51874 |
"group_size": {
|
@@ -55136,7 +55136,7 @@
|
|
55136 |
}
|
55137 |
},
|
55138 |
{
|
55139 |
-
"accuracy": 0.
|
55140 |
"total_bits": 267324160,
|
55141 |
"q_proj": {
|
55142 |
"group_size": {
|
@@ -55188,7 +55188,7 @@
|
|
55188 |
}
|
55189 |
},
|
55190 |
{
|
55191 |
-
"accuracy": 0.
|
55192 |
"total_bits": 271022080,
|
55193 |
"q_proj": {
|
55194 |
"group_size": {
|
@@ -62992,7 +62992,7 @@
|
|
62992 |
}
|
62993 |
},
|
62994 |
{
|
62995 |
-
"accuracy": 0.
|
62996 |
"total_bits": 271022080,
|
62997 |
"q_proj": {
|
62998 |
"group_size": {
|
@@ -64403,7 +64403,7 @@
|
|
64403 |
],
|
64404 |
"model.layers.33.self_attn": [
|
64405 |
{
|
64406 |
-
"accuracy": 0.
|
64407 |
"total_bits": 139115520,
|
64408 |
"q_proj": {
|
64409 |
"group_size": {
|
@@ -78063,14 +78063,14 @@
|
|
78063 |
},
|
78064 |
"strategy": {
|
78065 |
"model.layers.0.self_attn": {
|
78066 |
-
"accuracy": 0.
|
78067 |
-
"total_bits":
|
78068 |
"q_proj": {
|
78069 |
"group_size": {
|
78070 |
-
"
|
78071 |
},
|
78072 |
"bits": [
|
78073 |
-
|
78074 |
],
|
78075 |
"bits_prop": [
|
78076 |
1
|
@@ -78079,10 +78079,10 @@
|
|
78079 |
},
|
78080 |
"k_proj": {
|
78081 |
"group_size": {
|
78082 |
-
"
|
78083 |
},
|
78084 |
"bits": [
|
78085 |
-
|
78086 |
],
|
78087 |
"bits_prop": [
|
78088 |
1
|
@@ -78091,7 +78091,7 @@
|
|
78091 |
},
|
78092 |
"v_proj": {
|
78093 |
"group_size": {
|
78094 |
-
"8":
|
78095 |
},
|
78096 |
"bits": [
|
78097 |
8
|
@@ -78103,10 +78103,10 @@
|
|
78103 |
},
|
78104 |
"o_proj": {
|
78105 |
"group_size": {
|
78106 |
-
"
|
78107 |
},
|
78108 |
"bits": [
|
78109 |
-
|
78110 |
],
|
78111 |
"bits_prop": [
|
78112 |
1
|
@@ -78167,14 +78167,14 @@
|
|
78167 |
}
|
78168 |
},
|
78169 |
"model.layers.1.self_attn": {
|
78170 |
-
"accuracy": 0.
|
78171 |
-
"total_bits":
|
78172 |
"q_proj": {
|
78173 |
"group_size": {
|
78174 |
-
"
|
78175 |
},
|
78176 |
"bits": [
|
78177 |
-
|
78178 |
],
|
78179 |
"bits_prop": [
|
78180 |
1
|
@@ -78183,10 +78183,10 @@
|
|
78183 |
},
|
78184 |
"k_proj": {
|
78185 |
"group_size": {
|
78186 |
-
"
|
78187 |
},
|
78188 |
"bits": [
|
78189 |
-
|
78190 |
],
|
78191 |
"bits_prop": [
|
78192 |
1
|
@@ -78195,7 +78195,7 @@
|
|
78195 |
},
|
78196 |
"v_proj": {
|
78197 |
"group_size": {
|
78198 |
-
"8":
|
78199 |
},
|
78200 |
"bits": [
|
78201 |
8
|
@@ -78207,10 +78207,10 @@
|
|
78207 |
},
|
78208 |
"o_proj": {
|
78209 |
"group_size": {
|
78210 |
-
"
|
78211 |
},
|
78212 |
"bits": [
|
78213 |
-
|
78214 |
],
|
78215 |
"bits_prop": [
|
78216 |
1
|
@@ -78271,38 +78271,44 @@
|
|
78271 |
}
|
78272 |
},
|
78273 |
"model.layers.2.self_attn": {
|
78274 |
-
"accuracy": 0.
|
78275 |
-
"total_bits":
|
78276 |
"q_proj": {
|
78277 |
"group_size": {
|
78278 |
-
"6": 32
|
|
|
78279 |
},
|
78280 |
"bits": [
|
78281 |
-
6
|
|
|
78282 |
],
|
78283 |
"bits_prop": [
|
78284 |
-
1
|
|
|
78285 |
],
|
78286 |
"scale_bits": 4
|
78287 |
},
|
78288 |
"k_proj": {
|
78289 |
"group_size": {
|
78290 |
-
"6": 32
|
|
|
78291 |
},
|
78292 |
"bits": [
|
78293 |
-
6
|
|
|
78294 |
],
|
78295 |
"bits_prop": [
|
78296 |
-
1
|
|
|
78297 |
],
|
78298 |
"scale_bits": 4
|
78299 |
},
|
78300 |
"v_proj": {
|
78301 |
"group_size": {
|
78302 |
-
"
|
78303 |
},
|
78304 |
"bits": [
|
78305 |
-
|
78306 |
],
|
78307 |
"bits_prop": [
|
78308 |
1
|
@@ -78311,20 +78317,23 @@
|
|
78311 |
},
|
78312 |
"o_proj": {
|
78313 |
"group_size": {
|
78314 |
-
"6": 32
|
|
|
78315 |
},
|
78316 |
"bits": [
|
78317 |
-
6
|
|
|
78318 |
],
|
78319 |
"bits_prop": [
|
78320 |
-
1
|
|
|
78321 |
],
|
78322 |
"scale_bits": 4
|
78323 |
}
|
78324 |
},
|
78325 |
"model.layers.2.mlp": {
|
78326 |
-
"accuracy": 0.
|
78327 |
-
"total_bits":
|
78328 |
"gate_proj": {
|
78329 |
"group_size": {
|
78330 |
"8": 128,
|
@@ -78357,27 +78366,24 @@
|
|
78357 |
},
|
78358 |
"down_proj": {
|
78359 |
"group_size": {
|
78360 |
-
"8": 128
|
78361 |
-
"6": 128
|
78362 |
},
|
78363 |
"bits": [
|
78364 |
-
8
|
78365 |
-
6
|
78366 |
],
|
78367 |
"bits_prop": [
|
78368 |
-
|
78369 |
-
0.85
|
78370 |
],
|
78371 |
"scale_bits": 4
|
78372 |
}
|
78373 |
},
|
78374 |
"model.layers.3.self_attn": {
|
78375 |
-
"accuracy": 0.
|
78376 |
-
"total_bits":
|
78377 |
"q_proj": {
|
78378 |
"group_size": {
|
78379 |
-
"6":
|
78380 |
-
"5":
|
78381 |
},
|
78382 |
"bits": [
|
78383 |
6,
|
@@ -78391,8 +78397,8 @@
|
|
78391 |
},
|
78392 |
"k_proj": {
|
78393 |
"group_size": {
|
78394 |
-
"6":
|
78395 |
-
"5":
|
78396 |
},
|
78397 |
"bits": [
|
78398 |
6,
|
@@ -78406,7 +78412,7 @@
|
|
78406 |
},
|
78407 |
"v_proj": {
|
78408 |
"group_size": {
|
78409 |
-
"6":
|
78410 |
},
|
78411 |
"bits": [
|
78412 |
6
|
@@ -78418,8 +78424,8 @@
|
|
78418 |
},
|
78419 |
"o_proj": {
|
78420 |
"group_size": {
|
78421 |
-
"6":
|
78422 |
-
"5":
|
78423 |
},
|
78424 |
"bits": [
|
78425 |
6,
|
@@ -78656,16 +78662,16 @@
|
|
78656 |
}
|
78657 |
},
|
78658 |
"model.layers.5.mlp": {
|
78659 |
-
"accuracy": 0.
|
78660 |
-
"total_bits":
|
78661 |
"gate_proj": {
|
78662 |
"group_size": {
|
78663 |
-
"
|
78664 |
-
"
|
78665 |
},
|
78666 |
"bits": [
|
78667 |
-
|
78668 |
-
|
78669 |
],
|
78670 |
"bits_prop": [
|
78671 |
0.1,
|
@@ -78675,71 +78681,74 @@
|
|
78675 |
},
|
78676 |
"up_proj": {
|
78677 |
"group_size": {
|
78678 |
-
"
|
78679 |
-
"
|
78680 |
},
|
78681 |
"bits": [
|
78682 |
-
|
78683 |
-
|
78684 |
],
|
78685 |
"bits_prop": [
|
78686 |
-
0.
|
78687 |
-
0.
|
78688 |
],
|
78689 |
"scale_bits": 4
|
78690 |
},
|
78691 |
"down_proj": {
|
78692 |
"group_size": {
|
78693 |
-
"8":
|
78694 |
-
"6": 128
|
78695 |
-
"5": 128
|
78696 |
},
|
78697 |
"bits": [
|
78698 |
8,
|
78699 |
-
6
|
78700 |
-
5
|
78701 |
],
|
78702 |
"bits_prop": [
|
78703 |
-
0.
|
78704 |
-
0.1,
|
78705 |
0.85
|
78706 |
],
|
78707 |
"scale_bits": 4
|
78708 |
}
|
78709 |
},
|
78710 |
"model.layers.6.self_attn": {
|
78711 |
-
"accuracy": 0.
|
78712 |
-
"total_bits":
|
78713 |
"q_proj": {
|
78714 |
"group_size": {
|
78715 |
-
"6": 32
|
|
|
78716 |
},
|
78717 |
"bits": [
|
78718 |
-
6
|
|
|
78719 |
],
|
78720 |
"bits_prop": [
|
78721 |
-
1
|
|
|
78722 |
],
|
78723 |
"scale_bits": 4
|
78724 |
},
|
78725 |
"k_proj": {
|
78726 |
"group_size": {
|
78727 |
-
"6": 32
|
|
|
78728 |
},
|
78729 |
"bits": [
|
78730 |
-
6
|
|
|
78731 |
],
|
78732 |
"bits_prop": [
|
78733 |
-
1
|
|
|
78734 |
],
|
78735 |
"scale_bits": 4
|
78736 |
},
|
78737 |
"v_proj": {
|
78738 |
"group_size": {
|
78739 |
-
"
|
78740 |
},
|
78741 |
"bits": [
|
78742 |
-
|
78743 |
],
|
78744 |
"bits_prop": [
|
78745 |
1
|
@@ -78748,13 +78757,16 @@
|
|
78748 |
},
|
78749 |
"o_proj": {
|
78750 |
"group_size": {
|
78751 |
-
"6": 32
|
|
|
78752 |
},
|
78753 |
"bits": [
|
78754 |
-
6
|
|
|
78755 |
],
|
78756 |
"bits_prop": [
|
78757 |
-
1
|
|
|
78758 |
],
|
78759 |
"scale_bits": 4
|
78760 |
}
|
@@ -78812,11 +78824,11 @@
|
|
78812 |
}
|
78813 |
},
|
78814 |
"model.layers.7.self_attn": {
|
78815 |
-
"accuracy": 0.
|
78816 |
-
"total_bits":
|
78817 |
"q_proj": {
|
78818 |
"group_size": {
|
78819 |
-
"6":
|
78820 |
},
|
78821 |
"bits": [
|
78822 |
6
|
@@ -78828,7 +78840,7 @@
|
|
78828 |
},
|
78829 |
"k_proj": {
|
78830 |
"group_size": {
|
78831 |
-
"6":
|
78832 |
},
|
78833 |
"bits": [
|
78834 |
6
|
@@ -78840,10 +78852,10 @@
|
|
78840 |
},
|
78841 |
"v_proj": {
|
78842 |
"group_size": {
|
78843 |
-
"
|
78844 |
},
|
78845 |
"bits": [
|
78846 |
-
|
78847 |
],
|
78848 |
"bits_prop": [
|
78849 |
1
|
@@ -78852,7 +78864,7 @@
|
|
78852 |
},
|
78853 |
"o_proj": {
|
78854 |
"group_size": {
|
78855 |
-
"6":
|
78856 |
},
|
78857 |
"bits": [
|
78858 |
6
|
@@ -78864,7 +78876,120 @@
|
|
78864 |
}
|
78865 |
},
|
78866 |
"model.layers.7.mlp": {
|
78867 |
-
"accuracy": 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78868 |
"total_bits": 1466656000,
|
78869 |
"gate_proj": {
|
78870 |
"group_size": {
|
@@ -78915,40 +79040,10 @@
|
|
78915 |
"scale_bits": 4
|
78916 |
}
|
78917 |
},
|
78918 |
-
"model.layers.
|
78919 |
-
"accuracy": 0.
|
78920 |
-
"total_bits":
|
78921 |
"q_proj": {
|
78922 |
-
"group_size": {
|
78923 |
-
"6": 32,
|
78924 |
-
"5": 32
|
78925 |
-
},
|
78926 |
-
"bits": [
|
78927 |
-
6,
|
78928 |
-
5
|
78929 |
-
],
|
78930 |
-
"bits_prop": [
|
78931 |
-
0.1,
|
78932 |
-
0.9
|
78933 |
-
],
|
78934 |
-
"scale_bits": 4
|
78935 |
-
},
|
78936 |
-
"k_proj": {
|
78937 |
-
"group_size": {
|
78938 |
-
"6": 32,
|
78939 |
-
"5": 32
|
78940 |
-
},
|
78941 |
-
"bits": [
|
78942 |
-
6,
|
78943 |
-
5
|
78944 |
-
],
|
78945 |
-
"bits_prop": [
|
78946 |
-
0.1,
|
78947 |
-
0.9
|
78948 |
-
],
|
78949 |
-
"scale_bits": 4
|
78950 |
-
},
|
78951 |
-
"v_proj": {
|
78952 |
"group_size": {
|
78953 |
"6": 32
|
78954 |
},
|
@@ -78960,95 +79055,12 @@
|
|
78960 |
],
|
78961 |
"scale_bits": 4
|
78962 |
},
|
78963 |
-
"o_proj": {
|
78964 |
-
"group_size": {
|
78965 |
-
"6": 32,
|
78966 |
-
"5": 32
|
78967 |
-
},
|
78968 |
-
"bits": [
|
78969 |
-
6,
|
78970 |
-
5
|
78971 |
-
],
|
78972 |
-
"bits_prop": [
|
78973 |
-
0.1,
|
78974 |
-
0.9
|
78975 |
-
],
|
78976 |
-
"scale_bits": 4
|
78977 |
-
}
|
78978 |
-
},
|
78979 |
-
"model.layers.8.mlp": {
|
78980 |
-
"accuracy": 0.9908282831311226,
|
78981 |
-
"total_bits": 1441250608,
|
78982 |
-
"gate_proj": {
|
78983 |
-
"group_size": {
|
78984 |
-
"6": 128,
|
78985 |
-
"5": 128
|
78986 |
-
},
|
78987 |
-
"bits": [
|
78988 |
-
6,
|
78989 |
-
5
|
78990 |
-
],
|
78991 |
-
"bits_prop": [
|
78992 |
-
0.1,
|
78993 |
-
0.9
|
78994 |
-
],
|
78995 |
-
"scale_bits": 4
|
78996 |
-
},
|
78997 |
-
"up_proj": {
|
78998 |
-
"group_size": {
|
78999 |
-
"6": 128,
|
79000 |
-
"5": 128
|
79001 |
-
},
|
79002 |
-
"bits": [
|
79003 |
-
6,
|
79004 |
-
5
|
79005 |
-
],
|
79006 |
-
"bits_prop": [
|
79007 |
-
0.25,
|
79008 |
-
0.75
|
79009 |
-
],
|
79010 |
-
"scale_bits": 4
|
79011 |
-
},
|
79012 |
-
"down_proj": {
|
79013 |
-
"group_size": {
|
79014 |
-
"8": 32,
|
79015 |
-
"6": 128,
|
79016 |
-
"5": 128
|
79017 |
-
},
|
79018 |
-
"bits": [
|
79019 |
-
8,
|
79020 |
-
6,
|
79021 |
-
5
|
79022 |
-
],
|
79023 |
-
"bits_prop": [
|
79024 |
-
0.05,
|
79025 |
-
0.1,
|
79026 |
-
0.85
|
79027 |
-
],
|
79028 |
-
"scale_bits": 4
|
79029 |
-
}
|
79030 |
-
},
|
79031 |
-
"model.layers.9.self_attn": {
|
79032 |
-
"accuracy": 0.9989123591221869,
|
79033 |
-
"total_bits": 526999040,
|
79034 |
-
"q_proj": {
|
79035 |
-
"group_size": {
|
79036 |
-
"8": 128
|
79037 |
-
},
|
79038 |
-
"bits": [
|
79039 |
-
8
|
79040 |
-
],
|
79041 |
-
"bits_prop": [
|
79042 |
-
1
|
79043 |
-
],
|
79044 |
-
"scale_bits": 4
|
79045 |
-
},
|
79046 |
"k_proj": {
|
79047 |
"group_size": {
|
79048 |
-
"
|
79049 |
},
|
79050 |
"bits": [
|
79051 |
-
|
79052 |
],
|
79053 |
"bits_prop": [
|
79054 |
1
|
@@ -79057,7 +79069,7 @@
|
|
79057 |
},
|
79058 |
"v_proj": {
|
79059 |
"group_size": {
|
79060 |
-
"8":
|
79061 |
},
|
79062 |
"bits": [
|
79063 |
8
|
@@ -79069,10 +79081,10 @@
|
|
79069 |
},
|
79070 |
"o_proj": {
|
79071 |
"group_size": {
|
79072 |
-
"
|
79073 |
},
|
79074 |
"bits": [
|
79075 |
-
|
79076 |
],
|
79077 |
"bits_prop": [
|
79078 |
1
|
@@ -79081,16 +79093,16 @@
|
|
79081 |
}
|
79082 |
},
|
79083 |
"model.layers.9.mlp": {
|
79084 |
-
"accuracy": 0.
|
79085 |
-
"total_bits":
|
79086 |
"gate_proj": {
|
79087 |
"group_size": {
|
79088 |
-
"
|
79089 |
-
"
|
79090 |
},
|
79091 |
"bits": [
|
79092 |
-
|
79093 |
-
|
79094 |
],
|
79095 |
"bits_prop": [
|
79096 |
0.1,
|
@@ -79100,45 +79112,39 @@
|
|
79100 |
},
|
79101 |
"up_proj": {
|
79102 |
"group_size": {
|
79103 |
-
"
|
79104 |
-
"
|
79105 |
},
|
79106 |
"bits": [
|
79107 |
-
|
79108 |
-
|
79109 |
],
|
79110 |
"bits_prop": [
|
79111 |
-
0.
|
79112 |
-
0.
|
79113 |
],
|
79114 |
"scale_bits": 4
|
79115 |
},
|
79116 |
"down_proj": {
|
79117 |
"group_size": {
|
79118 |
-
"8":
|
79119 |
-
"6": 32,
|
79120 |
-
"5": 32
|
79121 |
},
|
79122 |
"bits": [
|
79123 |
-
8
|
79124 |
-
6,
|
79125 |
-
5
|
79126 |
],
|
79127 |
"bits_prop": [
|
79128 |
-
|
79129 |
-
0.1,
|
79130 |
-
0.85
|
79131 |
],
|
79132 |
"scale_bits": 4
|
79133 |
}
|
79134 |
},
|
79135 |
"model.layers.10.self_attn": {
|
79136 |
-
"accuracy": 0.
|
79137 |
-
"total_bits":
|
79138 |
"q_proj": {
|
79139 |
"group_size": {
|
79140 |
-
"6":
|
79141 |
-
"5":
|
79142 |
},
|
79143 |
"bits": [
|
79144 |
6,
|
@@ -79152,8 +79158,8 @@
|
|
79152 |
},
|
79153 |
"k_proj": {
|
79154 |
"group_size": {
|
79155 |
-
"6":
|
79156 |
-
"5":
|
79157 |
},
|
79158 |
"bits": [
|
79159 |
6,
|
@@ -79167,7 +79173,7 @@
|
|
79167 |
},
|
79168 |
"v_proj": {
|
79169 |
"group_size": {
|
79170 |
-
"6":
|
79171 |
},
|
79172 |
"bits": [
|
79173 |
6
|
@@ -79179,8 +79185,8 @@
|
|
79179 |
},
|
79180 |
"o_proj": {
|
79181 |
"group_size": {
|
79182 |
-
"6":
|
79183 |
-
"5":
|
79184 |
},
|
79185 |
"bits": [
|
79186 |
6,
|
@@ -79194,90 +79200,87 @@
|
|
79194 |
}
|
79195 |
},
|
79196 |
"model.layers.10.mlp": {
|
79197 |
-
"accuracy": 0.
|
79198 |
-
"total_bits":
|
79199 |
"gate_proj": {
|
79200 |
"group_size": {
|
79201 |
-
"6":
|
79202 |
-
"5": 32
|
79203 |
},
|
79204 |
"bits": [
|
79205 |
-
6
|
79206 |
-
5
|
79207 |
],
|
79208 |
"bits_prop": [
|
79209 |
-
|
79210 |
-
0.9
|
79211 |
],
|
79212 |
"scale_bits": 4
|
79213 |
},
|
79214 |
"up_proj": {
|
79215 |
"group_size": {
|
79216 |
-
"6":
|
79217 |
-
"5": 32
|
79218 |
},
|
79219 |
"bits": [
|
79220 |
-
6
|
79221 |
-
5
|
79222 |
],
|
79223 |
"bits_prop": [
|
79224 |
-
|
79225 |
-
0.75
|
79226 |
],
|
79227 |
"scale_bits": 4
|
79228 |
},
|
79229 |
"down_proj": {
|
79230 |
"group_size": {
|
79231 |
"8": 32,
|
79232 |
-
"6":
|
79233 |
-
"5": 32
|
79234 |
},
|
79235 |
"bits": [
|
79236 |
8,
|
79237 |
-
6
|
79238 |
-
5
|
79239 |
],
|
79240 |
"bits_prop": [
|
79241 |
0.05,
|
79242 |
-
0.
|
79243 |
-
0.85
|
79244 |
],
|
79245 |
"scale_bits": 4
|
79246 |
}
|
79247 |
},
|
79248 |
"model.layers.11.self_attn": {
|
79249 |
-
"accuracy": 0.
|
79250 |
-
"total_bits":
|
79251 |
"q_proj": {
|
79252 |
"group_size": {
|
79253 |
-
"6": 32
|
|
|
79254 |
},
|
79255 |
"bits": [
|
79256 |
-
6
|
|
|
79257 |
],
|
79258 |
"bits_prop": [
|
79259 |
-
1
|
|
|
79260 |
],
|
79261 |
"scale_bits": 4
|
79262 |
},
|
79263 |
"k_proj": {
|
79264 |
"group_size": {
|
79265 |
-
"6": 32
|
|
|
79266 |
},
|
79267 |
"bits": [
|
79268 |
-
6
|
|
|
79269 |
],
|
79270 |
"bits_prop": [
|
79271 |
-
1
|
|
|
79272 |
],
|
79273 |
"scale_bits": 4
|
79274 |
},
|
79275 |
"v_proj": {
|
79276 |
"group_size": {
|
79277 |
-
"
|
79278 |
},
|
79279 |
"bits": [
|
79280 |
-
|
79281 |
],
|
79282 |
"bits_prop": [
|
79283 |
1
|
@@ -79286,20 +79289,23 @@
|
|
79286 |
},
|
79287 |
"o_proj": {
|
79288 |
"group_size": {
|
79289 |
-
"6": 32
|
|
|
79290 |
},
|
79291 |
"bits": [
|
79292 |
-
6
|
|
|
79293 |
],
|
79294 |
"bits_prop": [
|
79295 |
-
1
|
|
|
79296 |
],
|
79297 |
"scale_bits": 4
|
79298 |
}
|
79299 |
},
|
79300 |
"model.layers.11.mlp": {
|
79301 |
-
"accuracy": 0.
|
79302 |
-
"total_bits":
|
79303 |
"gate_proj": {
|
79304 |
"group_size": {
|
79305 |
"8": 128,
|
@@ -79332,29 +79338,26 @@
|
|
79332 |
},
|
79333 |
"down_proj": {
|
79334 |
"group_size": {
|
79335 |
-
"8": 128
|
79336 |
-
"6": 128
|
79337 |
},
|
79338 |
"bits": [
|
79339 |
-
8
|
79340 |
-
6
|
79341 |
],
|
79342 |
"bits_prop": [
|
79343 |
-
|
79344 |
-
0.85
|
79345 |
],
|
79346 |
"scale_bits": 4
|
79347 |
}
|
79348 |
},
|
79349 |
"model.layers.12.self_attn": {
|
79350 |
-
"accuracy": 0.
|
79351 |
-
"total_bits":
|
79352 |
"q_proj": {
|
79353 |
"group_size": {
|
79354 |
-
"
|
79355 |
},
|
79356 |
"bits": [
|
79357 |
-
|
79358 |
],
|
79359 |
"bits_prop": [
|
79360 |
1
|
@@ -79363,10 +79366,10 @@
|
|
79363 |
},
|
79364 |
"k_proj": {
|
79365 |
"group_size": {
|
79366 |
-
"
|
79367 |
},
|
79368 |
"bits": [
|
79369 |
-
|
79370 |
],
|
79371 |
"bits_prop": [
|
79372 |
1
|
@@ -79375,7 +79378,7 @@
|
|
79375 |
},
|
79376 |
"v_proj": {
|
79377 |
"group_size": {
|
79378 |
-
"8":
|
79379 |
},
|
79380 |
"bits": [
|
79381 |
8
|
@@ -79387,10 +79390,10 @@
|
|
79387 |
},
|
79388 |
"o_proj": {
|
79389 |
"group_size": {
|
79390 |
-
"
|
79391 |
},
|
79392 |
"bits": [
|
79393 |
-
|
79394 |
],
|
79395 |
"bits_prop": [
|
79396 |
1
|
@@ -79399,8 +79402,8 @@
|
|
79399 |
}
|
79400 |
},
|
79401 |
"model.layers.12.mlp": {
|
79402 |
-
"accuracy": 0.
|
79403 |
-
"total_bits":
|
79404 |
"gate_proj": {
|
79405 |
"group_size": {
|
79406 |
"8": 128,
|
@@ -79432,61 +79435,6 @@
|
|
79432 |
"scale_bits": 4
|
79433 |
},
|
79434 |
"down_proj": {
|
79435 |
-
"group_size": {
|
79436 |
-
"8": 128,
|
79437 |
-
"6": 128
|
79438 |
-
},
|
79439 |
-
"bits": [
|
79440 |
-
8,
|
79441 |
-
6
|
79442 |
-
],
|
79443 |
-
"bits_prop": [
|
79444 |
-
0.15,
|
79445 |
-
0.85
|
79446 |
-
],
|
79447 |
-
"scale_bits": 4
|
79448 |
-
}
|
79449 |
-
},
|
79450 |
-
"model.layers.13.self_attn": {
|
79451 |
-
"accuracy": 0.998663023696281,
|
79452 |
-
"total_bits": 526999040,
|
79453 |
-
"q_proj": {
|
79454 |
-
"group_size": {
|
79455 |
-
"8": 128
|
79456 |
-
},
|
79457 |
-
"bits": [
|
79458 |
-
8
|
79459 |
-
],
|
79460 |
-
"bits_prop": [
|
79461 |
-
1
|
79462 |
-
],
|
79463 |
-
"scale_bits": 4
|
79464 |
-
},
|
79465 |
-
"k_proj": {
|
79466 |
-
"group_size": {
|
79467 |
-
"8": 128
|
79468 |
-
},
|
79469 |
-
"bits": [
|
79470 |
-
8
|
79471 |
-
],
|
79472 |
-
"bits_prop": [
|
79473 |
-
1
|
79474 |
-
],
|
79475 |
-
"scale_bits": 4
|
79476 |
-
},
|
79477 |
-
"v_proj": {
|
79478 |
-
"group_size": {
|
79479 |
-
"8": 128
|
79480 |
-
},
|
79481 |
-
"bits": [
|
79482 |
-
8
|
79483 |
-
],
|
79484 |
-
"bits_prop": [
|
79485 |
-
1
|
79486 |
-
],
|
79487 |
-
"scale_bits": 4
|
79488 |
-
},
|
79489 |
-
"o_proj": {
|
79490 |
"group_size": {
|
79491 |
"8": 128
|
79492 |
},
|
@@ -79499,10 +79447,10 @@
|
|
79499 |
"scale_bits": 4
|
79500 |
}
|
79501 |
},
|
79502 |
-
"model.layers.13.
|
79503 |
-
"accuracy": 0.
|
79504 |
-
"total_bits":
|
79505 |
-
"
|
79506 |
"group_size": {
|
79507 |
"6": 128
|
79508 |
},
|
@@ -79514,7 +79462,7 @@
|
|
79514 |
],
|
79515 |
"scale_bits": 4
|
79516 |
},
|
79517 |
-
"
|
79518 |
"group_size": {
|
79519 |
"6": 128
|
79520 |
},
|
@@ -79526,29 +79474,10 @@
|
|
79526 |
],
|
79527 |
"scale_bits": 4
|
79528 |
},
|
79529 |
-
"
|
79530 |
"group_size": {
|
79531 |
-
"8": 32,
|
79532 |
"6": 128
|
79533 |
},
|
79534 |
-
"bits": [
|
79535 |
-
8,
|
79536 |
-
6
|
79537 |
-
],
|
79538 |
-
"bits_prop": [
|
79539 |
-
0.05,
|
79540 |
-
0.95
|
79541 |
-
],
|
79542 |
-
"scale_bits": 4
|
79543 |
-
}
|
79544 |
-
},
|
79545 |
-
"model.layers.14.self_attn": {
|
79546 |
-
"accuracy": 0.9961008524987847,
|
79547 |
-
"total_bits": 415201280,
|
79548 |
-
"q_proj": {
|
79549 |
-
"group_size": {
|
79550 |
-
"6": 32
|
79551 |
-
},
|
79552 |
"bits": [
|
79553 |
6
|
79554 |
],
|
@@ -79557,33 +79486,9 @@
|
|
79557 |
],
|
79558 |
"scale_bits": 4
|
79559 |
},
|
79560 |
-
"k_proj": {
|
79561 |
-
"group_size": {
|
79562 |
-
"6": 32
|
79563 |
-
},
|
79564 |
-
"bits": [
|
79565 |
-
6
|
79566 |
-
],
|
79567 |
-
"bits_prop": [
|
79568 |
-
1
|
79569 |
-
],
|
79570 |
-
"scale_bits": 4
|
79571 |
-
},
|
79572 |
-
"v_proj": {
|
79573 |
-
"group_size": {
|
79574 |
-
"8": 32
|
79575 |
-
},
|
79576 |
-
"bits": [
|
79577 |
-
8
|
79578 |
-
],
|
79579 |
-
"bits_prop": [
|
79580 |
-
1
|
79581 |
-
],
|
79582 |
-
"scale_bits": 4
|
79583 |
-
},
|
79584 |
"o_proj": {
|
79585 |
"group_size": {
|
79586 |
-
"6":
|
79587 |
},
|
79588 |
"bits": [
|
79589 |
6
|
@@ -79594,9 +79499,9 @@
|
|
79594 |
"scale_bits": 4
|
79595 |
}
|
79596 |
},
|
79597 |
-
"model.layers.
|
79598 |
-
"accuracy": 0.
|
79599 |
-
"total_bits":
|
79600 |
"gate_proj": {
|
79601 |
"group_size": {
|
79602 |
"8": 128,
|
@@ -79629,22 +79534,19 @@
|
|
79629 |
},
|
79630 |
"down_proj": {
|
79631 |
"group_size": {
|
79632 |
-
"8": 128
|
79633 |
-
"6": 128
|
79634 |
},
|
79635 |
"bits": [
|
79636 |
-
8
|
79637 |
-
6
|
79638 |
],
|
79639 |
"bits_prop": [
|
79640 |
-
|
79641 |
-
0.85
|
79642 |
],
|
79643 |
"scale_bits": 4
|
79644 |
}
|
79645 |
},
|
79646 |
-
"model.layers.
|
79647 |
-
"accuracy": 0.
|
79648 |
"total_bits": 415201280,
|
79649 |
"q_proj": {
|
79650 |
"group_size": {
|
@@ -79695,51 +79597,60 @@
|
|
79695 |
"scale_bits": 4
|
79696 |
}
|
79697 |
},
|
79698 |
-
"model.layers.
|
79699 |
-
"accuracy": 0.
|
79700 |
-
"total_bits":
|
79701 |
"gate_proj": {
|
79702 |
"group_size": {
|
79703 |
-
"6":
|
|
|
79704 |
},
|
79705 |
"bits": [
|
79706 |
-
6
|
|
|
79707 |
],
|
79708 |
"bits_prop": [
|
79709 |
-
1
|
|
|
79710 |
],
|
79711 |
"scale_bits": 4
|
79712 |
},
|
79713 |
"up_proj": {
|
79714 |
"group_size": {
|
79715 |
-
"6":
|
|
|
79716 |
},
|
79717 |
"bits": [
|
79718 |
-
6
|
|
|
79719 |
],
|
79720 |
"bits_prop": [
|
79721 |
-
|
|
|
79722 |
],
|
79723 |
"scale_bits": 4
|
79724 |
},
|
79725 |
"down_proj": {
|
79726 |
"group_size": {
|
79727 |
"8": 32,
|
79728 |
-
"6":
|
|
|
79729 |
},
|
79730 |
"bits": [
|
79731 |
8,
|
79732 |
-
6
|
|
|
79733 |
],
|
79734 |
"bits_prop": [
|
79735 |
0.05,
|
79736 |
-
0.
|
|
|
79737 |
],
|
79738 |
"scale_bits": 4
|
79739 |
}
|
79740 |
},
|
79741 |
-
"model.layers.
|
79742 |
-
"accuracy": 0.
|
79743 |
"total_bits": 526999040,
|
79744 |
"q_proj": {
|
79745 |
"group_size": {
|
@@ -79790,42 +79701,36 @@
|
|
79790 |
"scale_bits": 4
|
79791 |
}
|
79792 |
},
|
79793 |
-
"model.layers.
|
79794 |
-
"accuracy": 0.
|
79795 |
-
"total_bits":
|
79796 |
"gate_proj": {
|
79797 |
"group_size": {
|
79798 |
-
"8": 128,
|
79799 |
"6": 128
|
79800 |
},
|
79801 |
"bits": [
|
79802 |
-
8,
|
79803 |
6
|
79804 |
],
|
79805 |
"bits_prop": [
|
79806 |
-
|
79807 |
-
0.9
|
79808 |
],
|
79809 |
"scale_bits": 4
|
79810 |
},
|
79811 |
"up_proj": {
|
79812 |
"group_size": {
|
79813 |
-
"8": 128,
|
79814 |
"6": 128
|
79815 |
},
|
79816 |
"bits": [
|
79817 |
-
8,
|
79818 |
6
|
79819 |
],
|
79820 |
"bits_prop": [
|
79821 |
-
|
79822 |
-
0.9
|
79823 |
],
|
79824 |
"scale_bits": 4
|
79825 |
},
|
79826 |
"down_proj": {
|
79827 |
"group_size": {
|
79828 |
-
"8":
|
79829 |
"6": 128
|
79830 |
},
|
79831 |
"bits": [
|
@@ -79833,14 +79738,14 @@
|
|
79833 |
6
|
79834 |
],
|
79835 |
"bits_prop": [
|
79836 |
-
0.
|
79837 |
-
0.
|
79838 |
],
|
79839 |
"scale_bits": 4
|
79840 |
}
|
79841 |
},
|
79842 |
-
"model.layers.
|
79843 |
-
"accuracy": 0.
|
79844 |
"total_bits": 415201280,
|
79845 |
"q_proj": {
|
79846 |
"group_size": {
|
@@ -79891,8 +79796,8 @@
|
|
79891 |
"scale_bits": 4
|
79892 |
}
|
79893 |
},
|
79894 |
-
"model.layers.
|
79895 |
-
"accuracy": 0.
|
79896 |
"total_bits": 1881221440,
|
79897 |
"gate_proj": {
|
79898 |
"group_size": {
|
@@ -79937,15 +79842,15 @@
|
|
79937 |
"scale_bits": 4
|
79938 |
}
|
79939 |
},
|
79940 |
-
"model.layers.
|
79941 |
-
"accuracy": 0.
|
79942 |
-
"total_bits":
|
79943 |
"q_proj": {
|
79944 |
"group_size": {
|
79945 |
-
"
|
79946 |
},
|
79947 |
"bits": [
|
79948 |
-
|
79949 |
],
|
79950 |
"bits_prop": [
|
79951 |
1
|
@@ -79954,10 +79859,10 @@
|
|
79954 |
},
|
79955 |
"k_proj": {
|
79956 |
"group_size": {
|
79957 |
-
"
|
79958 |
},
|
79959 |
"bits": [
|
79960 |
-
|
79961 |
],
|
79962 |
"bits_prop": [
|
79963 |
1
|
@@ -79966,7 +79871,7 @@
|
|
79966 |
},
|
79967 |
"v_proj": {
|
79968 |
"group_size": {
|
79969 |
-
"8":
|
79970 |
},
|
79971 |
"bits": [
|
79972 |
8
|
@@ -79978,10 +79883,10 @@
|
|
79978 |
},
|
79979 |
"o_proj": {
|
79980 |
"group_size": {
|
79981 |
-
"
|
79982 |
},
|
79983 |
"bits": [
|
79984 |
-
|
79985 |
],
|
79986 |
"bits_prop": [
|
79987 |
1
|
@@ -79989,17 +79894,17 @@
|
|
79989 |
"scale_bits": 4
|
79990 |
}
|
79991 |
},
|
79992 |
-
"model.layers.
|
79993 |
-
"accuracy": 0.
|
79994 |
-
"total_bits":
|
79995 |
"gate_proj": {
|
79996 |
"group_size": {
|
79997 |
-
"
|
79998 |
-
"
|
79999 |
},
|
80000 |
"bits": [
|
80001 |
-
|
80002 |
-
|
80003 |
],
|
80004 |
"bits_prop": [
|
80005 |
0.1,
|
@@ -80009,41 +79914,44 @@
|
|
80009 |
},
|
80010 |
"up_proj": {
|
80011 |
"group_size": {
|
80012 |
-
"
|
80013 |
-
"
|
80014 |
},
|
80015 |
"bits": [
|
80016 |
-
|
80017 |
-
|
80018 |
],
|
80019 |
"bits_prop": [
|
80020 |
-
0.
|
80021 |
-
0.
|
80022 |
],
|
80023 |
"scale_bits": 4
|
80024 |
},
|
80025 |
"down_proj": {
|
80026 |
"group_size": {
|
80027 |
-
"8":
|
80028 |
-
"6":
|
|
|
80029 |
},
|
80030 |
"bits": [
|
80031 |
8,
|
80032 |
-
6
|
|
|
80033 |
],
|
80034 |
"bits_prop": [
|
80035 |
-
0.
|
|
|
80036 |
0.85
|
80037 |
],
|
80038 |
"scale_bits": 4
|
80039 |
}
|
80040 |
},
|
80041 |
-
"model.layers.
|
80042 |
-
"accuracy": 0.
|
80043 |
-
"total_bits":
|
80044 |
"q_proj": {
|
80045 |
"group_size": {
|
80046 |
-
"6":
|
80047 |
},
|
80048 |
"bits": [
|
80049 |
6
|
@@ -80055,7 +79963,7 @@
|
|
80055 |
},
|
80056 |
"k_proj": {
|
80057 |
"group_size": {
|
80058 |
-
"6":
|
80059 |
},
|
80060 |
"bits": [
|
80061 |
6
|
@@ -80067,10 +79975,10 @@
|
|
80067 |
},
|
80068 |
"v_proj": {
|
80069 |
"group_size": {
|
80070 |
-
"
|
80071 |
},
|
80072 |
"bits": [
|
80073 |
-
|
80074 |
],
|
80075 |
"bits_prop": [
|
80076 |
1
|
@@ -80079,7 +79987,7 @@
|
|
80079 |
},
|
80080 |
"o_proj": {
|
80081 |
"group_size": {
|
80082 |
-
"6":
|
80083 |
},
|
80084 |
"bits": [
|
80085 |
6
|
@@ -80090,8 +79998,8 @@
|
|
80090 |
"scale_bits": 4
|
80091 |
}
|
80092 |
},
|
80093 |
-
"model.layers.
|
80094 |
-
"accuracy": 0.
|
80095 |
"total_bits": 1670626608,
|
80096 |
"gate_proj": {
|
80097 |
"group_size": {
|
@@ -80133,8 +80041,8 @@
|
|
80133 |
"scale_bits": 4
|
80134 |
}
|
80135 |
},
|
80136 |
-
"model.layers.
|
80137 |
-
"accuracy": 0.
|
80138 |
"total_bits": 526999040,
|
80139 |
"q_proj": {
|
80140 |
"group_size": {
|
@@ -80185,8 +80093,8 @@
|
|
80185 |
"scale_bits": 4
|
80186 |
}
|
80187 |
},
|
80188 |
-
"model.layers.
|
80189 |
-
"accuracy": 0.
|
80190 |
"total_bits": 1725245760,
|
80191 |
"gate_proj": {
|
80192 |
"group_size": {
|
@@ -80234,39 +80142,45 @@
|
|
80234 |
"scale_bits": 4
|
80235 |
}
|
80236 |
},
|
80237 |
-
"model.layers.
|
80238 |
-
"accuracy": 0.
|
80239 |
-
"total_bits":
|
80240 |
"q_proj": {
|
80241 |
"group_size": {
|
80242 |
-
"6": 32
|
|
|
80243 |
},
|
80244 |
"bits": [
|
80245 |
-
6
|
|
|
80246 |
],
|
80247 |
"bits_prop": [
|
80248 |
-
1
|
|
|
80249 |
],
|
80250 |
"scale_bits": 4
|
80251 |
},
|
80252 |
"k_proj": {
|
80253 |
"group_size": {
|
80254 |
-
"6": 32
|
|
|
80255 |
},
|
80256 |
"bits": [
|
80257 |
-
6
|
|
|
80258 |
],
|
80259 |
"bits_prop": [
|
80260 |
-
1
|
|
|
80261 |
],
|
80262 |
"scale_bits": 4
|
80263 |
},
|
80264 |
"v_proj": {
|
80265 |
"group_size": {
|
80266 |
-
"
|
80267 |
},
|
80268 |
"bits": [
|
80269 |
-
|
80270 |
],
|
80271 |
"bits_prop": [
|
80272 |
1
|
@@ -80275,19 +80189,22 @@
|
|
80275 |
},
|
80276 |
"o_proj": {
|
80277 |
"group_size": {
|
80278 |
-
"6": 32
|
|
|
80279 |
},
|
80280 |
"bits": [
|
80281 |
-
6
|
|
|
80282 |
],
|
80283 |
"bits_prop": [
|
80284 |
-
1
|
|
|
80285 |
],
|
80286 |
"scale_bits": 4
|
80287 |
}
|
80288 |
},
|
80289 |
-
"model.layers.
|
80290 |
-
"accuracy": 0.
|
80291 |
"total_bits": 1670626608,
|
80292 |
"gate_proj": {
|
80293 |
"group_size": {
|
@@ -80329,8 +80246,8 @@
|
|
80329 |
"scale_bits": 4
|
80330 |
}
|
80331 |
},
|
80332 |
-
"model.layers.
|
80333 |
-
"accuracy": 0.
|
80334 |
"total_bits": 526999040,
|
80335 |
"q_proj": {
|
80336 |
"group_size": {
|
@@ -80381,6 +80298,104 @@
|
|
80381 |
"scale_bits": 4
|
80382 |
}
|
80383 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80384 |
"model.layers.22.mlp": {
|
80385 |
"accuracy": 0.9926852826029062,
|
80386 |
"total_bits": 1725245760,
|
@@ -80526,14 +80541,14 @@
|
|
80526 |
}
|
80527 |
},
|
80528 |
"model.layers.24.self_attn": {
|
80529 |
-
"accuracy": 0.
|
80530 |
-
"total_bits":
|
80531 |
"q_proj": {
|
80532 |
"group_size": {
|
80533 |
-
"
|
80534 |
},
|
80535 |
"bits": [
|
80536 |
-
|
80537 |
],
|
80538 |
"bits_prop": [
|
80539 |
1
|
@@ -80542,10 +80557,10 @@
|
|
80542 |
},
|
80543 |
"k_proj": {
|
80544 |
"group_size": {
|
80545 |
-
"
|
80546 |
},
|
80547 |
"bits": [
|
80548 |
-
|
80549 |
],
|
80550 |
"bits_prop": [
|
80551 |
1
|
@@ -80554,7 +80569,7 @@
|
|
80554 |
},
|
80555 |
"v_proj": {
|
80556 |
"group_size": {
|
80557 |
-
"8":
|
80558 |
},
|
80559 |
"bits": [
|
80560 |
8
|
@@ -80566,10 +80581,10 @@
|
|
80566 |
},
|
80567 |
"o_proj": {
|
80568 |
"group_size": {
|
80569 |
-
"
|
80570 |
},
|
80571 |
"bits": [
|
80572 |
-
|
80573 |
],
|
80574 |
"bits_prop": [
|
80575 |
1
|
@@ -80621,14 +80636,14 @@
|
|
80621 |
}
|
80622 |
},
|
80623 |
"model.layers.25.self_attn": {
|
80624 |
-
"accuracy": 0.
|
80625 |
-
"total_bits":
|
80626 |
"q_proj": {
|
80627 |
"group_size": {
|
80628 |
-
"
|
80629 |
},
|
80630 |
"bits": [
|
80631 |
-
|
80632 |
],
|
80633 |
"bits_prop": [
|
80634 |
1
|
@@ -80637,10 +80652,10 @@
|
|
80637 |
},
|
80638 |
"k_proj": {
|
80639 |
"group_size": {
|
80640 |
-
"
|
80641 |
},
|
80642 |
"bits": [
|
80643 |
-
|
80644 |
],
|
80645 |
"bits_prop": [
|
80646 |
1
|
@@ -80649,7 +80664,7 @@
|
|
80649 |
},
|
80650 |
"v_proj": {
|
80651 |
"group_size": {
|
80652 |
-
"8":
|
80653 |
},
|
80654 |
"bits": [
|
80655 |
8
|
@@ -80660,6 +80675,52 @@
|
|
80660 |
"scale_bits": 4
|
80661 |
},
|
80662 |
"o_proj": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80663 |
"group_size": {
|
80664 |
"8": 128
|
80665 |
},
|
@@ -80672,8 +80733,69 @@
|
|
80672 |
"scale_bits": 4
|
80673 |
}
|
80674 |
},
|
80675 |
-
"model.layers.
|
80676 |
-
"accuracy": 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80677 |
"total_bits": 1670626608,
|
80678 |
"gate_proj": {
|
80679 |
"group_size": {
|
@@ -80715,39 +80837,45 @@
|
|
80715 |
"scale_bits": 4
|
80716 |
}
|
80717 |
},
|
80718 |
-
"model.layers.
|
80719 |
-
"accuracy": 0.
|
80720 |
-
"total_bits":
|
80721 |
"q_proj": {
|
80722 |
"group_size": {
|
80723 |
-
"
|
|
|
80724 |
},
|
80725 |
"bits": [
|
80726 |
-
|
|
|
80727 |
],
|
80728 |
"bits_prop": [
|
80729 |
-
1
|
|
|
80730 |
],
|
80731 |
"scale_bits": 4
|
80732 |
},
|
80733 |
"k_proj": {
|
80734 |
"group_size": {
|
80735 |
-
"
|
|
|
80736 |
},
|
80737 |
"bits": [
|
80738 |
-
|
|
|
80739 |
],
|
80740 |
"bits_prop": [
|
80741 |
-
1
|
|
|
80742 |
],
|
80743 |
"scale_bits": 4
|
80744 |
},
|
80745 |
"v_proj": {
|
80746 |
"group_size": {
|
80747 |
-
"
|
80748 |
},
|
80749 |
"bits": [
|
80750 |
-
|
80751 |
],
|
80752 |
"bits_prop": [
|
80753 |
1
|
@@ -80756,19 +80884,22 @@
|
|
80756 |
},
|
80757 |
"o_proj": {
|
80758 |
"group_size": {
|
80759 |
-
"
|
|
|
80760 |
},
|
80761 |
"bits": [
|
80762 |
-
|
|
|
80763 |
],
|
80764 |
"bits_prop": [
|
80765 |
-
1
|
|
|
80766 |
],
|
80767 |
"scale_bits": 4
|
80768 |
}
|
80769 |
},
|
80770 |
-
"model.layers.
|
80771 |
-
"accuracy": 0.
|
80772 |
"total_bits": 1670626608,
|
80773 |
"gate_proj": {
|
80774 |
"group_size": {
|
@@ -80810,12 +80941,12 @@
|
|
80810 |
"scale_bits": 4
|
80811 |
}
|
80812 |
},
|
80813 |
-
"model.layers.
|
80814 |
-
"accuracy": 0.
|
80815 |
-
"total_bits":
|
80816 |
"q_proj": {
|
80817 |
"group_size": {
|
80818 |
-
"6":
|
80819 |
},
|
80820 |
"bits": [
|
80821 |
6
|
@@ -80827,7 +80958,7 @@
|
|
80827 |
},
|
80828 |
"k_proj": {
|
80829 |
"group_size": {
|
80830 |
-
"6":
|
80831 |
},
|
80832 |
"bits": [
|
80833 |
6
|
@@ -80839,10 +80970,10 @@
|
|
80839 |
},
|
80840 |
"v_proj": {
|
80841 |
"group_size": {
|
80842 |
-
"
|
80843 |
},
|
80844 |
"bits": [
|
80845 |
-
|
80846 |
],
|
80847 |
"bits_prop": [
|
80848 |
1
|
@@ -80851,7 +80982,7 @@
|
|
80851 |
},
|
80852 |
"o_proj": {
|
80853 |
"group_size": {
|
80854 |
-
"6":
|
80855 |
},
|
80856 |
"bits": [
|
80857 |
6
|
@@ -80862,54 +80993,51 @@
|
|
80862 |
"scale_bits": 4
|
80863 |
}
|
80864 |
},
|
80865 |
-
"model.layers.
|
80866 |
-
"accuracy": 0.
|
80867 |
-
"total_bits":
|
80868 |
"gate_proj": {
|
80869 |
"group_size": {
|
80870 |
-
"8": 128,
|
80871 |
"6": 128
|
80872 |
},
|
80873 |
"bits": [
|
80874 |
-
8,
|
80875 |
6
|
80876 |
],
|
80877 |
"bits_prop": [
|
80878 |
-
|
80879 |
-
0.9
|
80880 |
],
|
80881 |
"scale_bits": 4
|
80882 |
},
|
80883 |
"up_proj": {
|
80884 |
"group_size": {
|
80885 |
-
"8": 128,
|
80886 |
"6": 128
|
80887 |
},
|
80888 |
"bits": [
|
80889 |
-
8,
|
80890 |
6
|
80891 |
],
|
80892 |
"bits_prop": [
|
80893 |
-
|
80894 |
-
0.9
|
80895 |
],
|
80896 |
"scale_bits": 4
|
80897 |
},
|
80898 |
"down_proj": {
|
80899 |
"group_size": {
|
80900 |
-
"8":
|
|
|
80901 |
},
|
80902 |
"bits": [
|
80903 |
-
8
|
|
|
80904 |
],
|
80905 |
"bits_prop": [
|
80906 |
-
|
|
|
80907 |
],
|
80908 |
"scale_bits": 4
|
80909 |
}
|
80910 |
},
|
80911 |
-
"model.layers.
|
80912 |
-
"accuracy": 0.
|
80913 |
"total_bits": 415201280,
|
80914 |
"q_proj": {
|
80915 |
"group_size": {
|
@@ -80960,51 +81088,60 @@
|
|
80960 |
"scale_bits": 4
|
80961 |
}
|
80962 |
},
|
80963 |
-
"model.layers.
|
80964 |
-
"accuracy": 0.
|
80965 |
-
"total_bits":
|
80966 |
"gate_proj": {
|
80967 |
"group_size": {
|
80968 |
-
"6": 128
|
|
|
80969 |
},
|
80970 |
"bits": [
|
80971 |
-
6
|
|
|
80972 |
],
|
80973 |
"bits_prop": [
|
80974 |
-
1
|
|
|
80975 |
],
|
80976 |
"scale_bits": 4
|
80977 |
},
|
80978 |
"up_proj": {
|
80979 |
"group_size": {
|
80980 |
-
"6": 128
|
|
|
80981 |
},
|
80982 |
"bits": [
|
80983 |
-
6
|
|
|
80984 |
],
|
80985 |
"bits_prop": [
|
80986 |
-
|
|
|
80987 |
],
|
80988 |
"scale_bits": 4
|
80989 |
},
|
80990 |
"down_proj": {
|
80991 |
"group_size": {
|
80992 |
"8": 32,
|
80993 |
-
"6": 128
|
|
|
80994 |
},
|
80995 |
"bits": [
|
80996 |
8,
|
80997 |
-
6
|
|
|
80998 |
],
|
80999 |
"bits_prop": [
|
81000 |
0.05,
|
81001 |
-
0.
|
|
|
81002 |
],
|
81003 |
"scale_bits": 4
|
81004 |
}
|
81005 |
},
|
81006 |
-
"model.layers.
|
81007 |
-
"accuracy": 0.
|
81008 |
"total_bits": 415201280,
|
81009 |
"q_proj": {
|
81010 |
"group_size": {
|
@@ -81055,64 +81192,55 @@
|
|
81055 |
"scale_bits": 4
|
81056 |
}
|
81057 |
},
|
81058 |
-
"model.layers.
|
81059 |
-
"accuracy": 0.
|
81060 |
-
"total_bits":
|
81061 |
"gate_proj": {
|
81062 |
"group_size": {
|
81063 |
-
"6":
|
81064 |
-
"5": 32
|
81065 |
},
|
81066 |
"bits": [
|
81067 |
-
6
|
81068 |
-
5
|
81069 |
],
|
81070 |
"bits_prop": [
|
81071 |
-
|
81072 |
-
0.9
|
81073 |
],
|
81074 |
"scale_bits": 4
|
81075 |
},
|
81076 |
"up_proj": {
|
81077 |
"group_size": {
|
81078 |
-
"6":
|
81079 |
-
"5": 32
|
81080 |
},
|
81081 |
"bits": [
|
81082 |
-
6
|
81083 |
-
5
|
81084 |
],
|
81085 |
"bits_prop": [
|
81086 |
-
|
81087 |
-
0.75
|
81088 |
],
|
81089 |
"scale_bits": 4
|
81090 |
},
|
81091 |
"down_proj": {
|
81092 |
"group_size": {
|
81093 |
"8": 32,
|
81094 |
-
"6":
|
81095 |
-
"5": 32
|
81096 |
},
|
81097 |
"bits": [
|
81098 |
8,
|
81099 |
-
6
|
81100 |
-
5
|
81101 |
],
|
81102 |
"bits_prop": [
|
81103 |
0.05,
|
81104 |
-
0.
|
81105 |
-
0.85
|
81106 |
],
|
81107 |
"scale_bits": 4
|
81108 |
}
|
81109 |
},
|
81110 |
-
"model.layers.
|
81111 |
-
"accuracy": 0.
|
81112 |
-
"total_bits":
|
81113 |
"q_proj": {
|
81114 |
"group_size": {
|
81115 |
-
"6":
|
81116 |
},
|
81117 |
"bits": [
|
81118 |
6
|
@@ -81124,7 +81252,7 @@
|
|
81124 |
},
|
81125 |
"k_proj": {
|
81126 |
"group_size": {
|
81127 |
-
"6":
|
81128 |
},
|
81129 |
"bits": [
|
81130 |
6
|
@@ -81136,10 +81264,10 @@
|
|
81136 |
},
|
81137 |
"v_proj": {
|
81138 |
"group_size": {
|
81139 |
-
"
|
81140 |
},
|
81141 |
"bits": [
|
81142 |
-
|
81143 |
],
|
81144 |
"bits_prop": [
|
81145 |
1
|
@@ -81148,7 +81276,7 @@
|
|
81148 |
},
|
81149 |
"o_proj": {
|
81150 |
"group_size": {
|
81151 |
-
"6":
|
81152 |
},
|
81153 |
"bits": [
|
81154 |
6
|
@@ -81159,91 +81287,88 @@
|
|
81159 |
"scale_bits": 4
|
81160 |
}
|
81161 |
},
|
81162 |
-
"model.layers.
|
81163 |
-
"accuracy": 0.
|
81164 |
-
"total_bits":
|
81165 |
"gate_proj": {
|
81166 |
"group_size": {
|
81167 |
-
"6":
|
81168 |
-
"5": 32
|
81169 |
},
|
81170 |
"bits": [
|
81171 |
-
6
|
81172 |
-
5
|
81173 |
],
|
81174 |
"bits_prop": [
|
81175 |
-
|
81176 |
-
0.9
|
81177 |
],
|
81178 |
"scale_bits": 4
|
81179 |
},
|
81180 |
"up_proj": {
|
81181 |
"group_size": {
|
81182 |
-
"6":
|
81183 |
-
"5": 32
|
81184 |
},
|
81185 |
"bits": [
|
81186 |
-
6
|
81187 |
-
5
|
81188 |
],
|
81189 |
"bits_prop": [
|
81190 |
-
|
81191 |
-
0.75
|
81192 |
],
|
81193 |
"scale_bits": 4
|
81194 |
},
|
81195 |
"down_proj": {
|
81196 |
"group_size": {
|
81197 |
"8": 32,
|
81198 |
-
"6":
|
81199 |
-
"5": 32
|
81200 |
},
|
81201 |
"bits": [
|
81202 |
8,
|
81203 |
-
6
|
81204 |
-
5
|
81205 |
],
|
81206 |
"bits_prop": [
|
81207 |
0.05,
|
81208 |
-
0.
|
81209 |
-
0.85
|
81210 |
],
|
81211 |
"scale_bits": 4
|
81212 |
}
|
81213 |
},
|
81214 |
-
"model.layers.
|
81215 |
-
"accuracy": 0.
|
81216 |
-
"total_bits":
|
81217 |
"q_proj": {
|
81218 |
"group_size": {
|
81219 |
-
"6":
|
|
|
81220 |
},
|
81221 |
"bits": [
|
81222 |
-
6
|
|
|
81223 |
],
|
81224 |
"bits_prop": [
|
81225 |
-
1
|
|
|
81226 |
],
|
81227 |
"scale_bits": 4
|
81228 |
},
|
81229 |
"k_proj": {
|
81230 |
"group_size": {
|
81231 |
-
"6":
|
|
|
81232 |
},
|
81233 |
"bits": [
|
81234 |
-
6
|
|
|
81235 |
],
|
81236 |
"bits_prop": [
|
81237 |
-
1
|
|
|
81238 |
],
|
81239 |
"scale_bits": 4
|
81240 |
},
|
81241 |
"v_proj": {
|
81242 |
"group_size": {
|
81243 |
-
"
|
81244 |
},
|
81245 |
"bits": [
|
81246 |
-
|
81247 |
],
|
81248 |
"bits_prop": [
|
81249 |
1
|
@@ -81252,62 +81377,74 @@
|
|
81252 |
},
|
81253 |
"o_proj": {
|
81254 |
"group_size": {
|
81255 |
-
"6":
|
|
|
81256 |
},
|
81257 |
"bits": [
|
81258 |
-
6
|
|
|
81259 |
],
|
81260 |
"bits_prop": [
|
81261 |
-
1
|
|
|
81262 |
],
|
81263 |
"scale_bits": 4
|
81264 |
}
|
81265 |
},
|
81266 |
-
"model.layers.
|
81267 |
-
"accuracy": 0.
|
81268 |
-
"total_bits":
|
81269 |
"gate_proj": {
|
81270 |
"group_size": {
|
81271 |
-
"6": 128
|
|
|
81272 |
},
|
81273 |
"bits": [
|
81274 |
-
6
|
|
|
81275 |
],
|
81276 |
"bits_prop": [
|
81277 |
-
1
|
|
|
81278 |
],
|
81279 |
"scale_bits": 4
|
81280 |
},
|
81281 |
"up_proj": {
|
81282 |
"group_size": {
|
81283 |
-
"6": 128
|
|
|
81284 |
},
|
81285 |
"bits": [
|
81286 |
-
6
|
|
|
81287 |
],
|
81288 |
"bits_prop": [
|
81289 |
-
|
|
|
81290 |
],
|
81291 |
"scale_bits": 4
|
81292 |
},
|
81293 |
"down_proj": {
|
81294 |
"group_size": {
|
81295 |
"8": 32,
|
81296 |
-
"6": 128
|
|
|
81297 |
},
|
81298 |
"bits": [
|
81299 |
8,
|
81300 |
-
6
|
|
|
81301 |
],
|
81302 |
"bits_prop": [
|
81303 |
0.05,
|
81304 |
-
0.
|
|
|
81305 |
],
|
81306 |
"scale_bits": 4
|
81307 |
}
|
81308 |
},
|
81309 |
-
"model.layers.
|
81310 |
-
"accuracy": 0.
|
81311 |
"total_bits": 526999040,
|
81312 |
"q_proj": {
|
81313 |
"group_size": {
|
@@ -81358,36 +81495,42 @@
|
|
81358 |
"scale_bits": 4
|
81359 |
}
|
81360 |
},
|
81361 |
-
"model.layers.
|
81362 |
-
"accuracy": 0.
|
81363 |
-
"total_bits":
|
81364 |
"gate_proj": {
|
81365 |
"group_size": {
|
|
|
81366 |
"6": 128
|
81367 |
},
|
81368 |
"bits": [
|
|
|
81369 |
6
|
81370 |
],
|
81371 |
"bits_prop": [
|
81372 |
-
1
|
|
|
81373 |
],
|
81374 |
"scale_bits": 4
|
81375 |
},
|
81376 |
"up_proj": {
|
81377 |
"group_size": {
|
|
|
81378 |
"6": 128
|
81379 |
},
|
81380 |
"bits": [
|
|
|
81381 |
6
|
81382 |
],
|
81383 |
"bits_prop": [
|
81384 |
-
1
|
|
|
81385 |
],
|
81386 |
"scale_bits": 4
|
81387 |
},
|
81388 |
"down_proj": {
|
81389 |
"group_size": {
|
81390 |
-
"8":
|
81391 |
"6": 128
|
81392 |
},
|
81393 |
"bits": [
|
@@ -81395,45 +81538,51 @@
|
|
81395 |
6
|
81396 |
],
|
81397 |
"bits_prop": [
|
81398 |
-
0.
|
81399 |
-
0.
|
81400 |
],
|
81401 |
"scale_bits": 4
|
81402 |
}
|
81403 |
},
|
81404 |
-
"model.layers.
|
81405 |
-
"accuracy": 0.
|
81406 |
-
"total_bits":
|
81407 |
"q_proj": {
|
81408 |
"group_size": {
|
81409 |
-
"6": 32
|
|
|
81410 |
},
|
81411 |
"bits": [
|
81412 |
-
6
|
|
|
81413 |
],
|
81414 |
"bits_prop": [
|
81415 |
-
1
|
|
|
81416 |
],
|
81417 |
"scale_bits": 4
|
81418 |
},
|
81419 |
"k_proj": {
|
81420 |
"group_size": {
|
81421 |
-
"6": 32
|
|
|
81422 |
},
|
81423 |
"bits": [
|
81424 |
-
6
|
|
|
81425 |
],
|
81426 |
"bits_prop": [
|
81427 |
-
1
|
|
|
81428 |
],
|
81429 |
"scale_bits": 4
|
81430 |
},
|
81431 |
"v_proj": {
|
81432 |
"group_size": {
|
81433 |
-
"
|
81434 |
},
|
81435 |
"bits": [
|
81436 |
-
|
81437 |
],
|
81438 |
"bits_prop": [
|
81439 |
1
|
@@ -81442,108 +81591,96 @@
|
|
81442 |
},
|
81443 |
"o_proj": {
|
81444 |
"group_size": {
|
81445 |
-
"6": 32
|
|
|
81446 |
},
|
81447 |
"bits": [
|
81448 |
-
6
|
|
|
81449 |
],
|
81450 |
"bits_prop": [
|
81451 |
-
1
|
|
|
81452 |
],
|
81453 |
"scale_bits": 4
|
81454 |
}
|
81455 |
},
|
81456 |
-
"model.layers.
|
81457 |
-
"accuracy": 0.
|
81458 |
-
"total_bits":
|
81459 |
"gate_proj": {
|
81460 |
"group_size": {
|
81461 |
-
"6":
|
81462 |
-
"5": 32
|
81463 |
},
|
81464 |
"bits": [
|
81465 |
-
6
|
81466 |
-
5
|
81467 |
],
|
81468 |
"bits_prop": [
|
81469 |
-
|
81470 |
-
0.9
|
81471 |
],
|
81472 |
"scale_bits": 4
|
81473 |
},
|
81474 |
"up_proj": {
|
81475 |
"group_size": {
|
81476 |
-
"6":
|
81477 |
-
"5": 32
|
81478 |
},
|
81479 |
"bits": [
|
81480 |
-
6
|
81481 |
-
5
|
81482 |
],
|
81483 |
"bits_prop": [
|
81484 |
-
|
81485 |
-
0.75
|
81486 |
],
|
81487 |
"scale_bits": 4
|
81488 |
},
|
81489 |
"down_proj": {
|
81490 |
"group_size": {
|
81491 |
"8": 32,
|
81492 |
-
"6":
|
81493 |
-
"5": 32
|
81494 |
},
|
81495 |
"bits": [
|
81496 |
8,
|
81497 |
-
6
|
81498 |
-
5
|
81499 |
],
|
81500 |
"bits_prop": [
|
81501 |
0.05,
|
81502 |
-
0.
|
81503 |
-
0.85
|
81504 |
],
|
81505 |
"scale_bits": 4
|
81506 |
}
|
81507 |
},
|
81508 |
-
"model.layers.
|
81509 |
-
"accuracy": 0.
|
81510 |
-
"total_bits":
|
81511 |
"q_proj": {
|
81512 |
"group_size": {
|
81513 |
-
"
|
81514 |
-
"5": 32
|
81515 |
},
|
81516 |
"bits": [
|
81517 |
-
|
81518 |
-
5
|
81519 |
],
|
81520 |
"bits_prop": [
|
81521 |
-
|
81522 |
-
0.9
|
81523 |
],
|
81524 |
"scale_bits": 4
|
81525 |
},
|
81526 |
"k_proj": {
|
81527 |
"group_size": {
|
81528 |
-
"
|
81529 |
-
"5": 32
|
81530 |
},
|
81531 |
"bits": [
|
81532 |
-
|
81533 |
-
5
|
81534 |
],
|
81535 |
"bits_prop": [
|
81536 |
-
|
81537 |
-
0.9
|
81538 |
],
|
81539 |
"scale_bits": 4
|
81540 |
},
|
81541 |
"v_proj": {
|
81542 |
"group_size": {
|
81543 |
-
"
|
81544 |
},
|
81545 |
"bits": [
|
81546 |
-
|
81547 |
],
|
81548 |
"bits_prop": [
|
81549 |
1
|
@@ -81552,27 +81689,24 @@
|
|
81552 |
},
|
81553 |
"o_proj": {
|
81554 |
"group_size": {
|
81555 |
-
"
|
81556 |
-
"5": 32
|
81557 |
},
|
81558 |
"bits": [
|
81559 |
-
|
81560 |
-
5
|
81561 |
],
|
81562 |
"bits_prop": [
|
81563 |
-
|
81564 |
-
0.9
|
81565 |
],
|
81566 |
"scale_bits": 4
|
81567 |
}
|
81568 |
},
|
81569 |
-
"model.layers.
|
81570 |
-
"accuracy": 0.
|
81571 |
-
"total_bits":
|
81572 |
"gate_proj": {
|
81573 |
"group_size": {
|
81574 |
-
"6":
|
81575 |
-
"5":
|
81576 |
},
|
81577 |
"bits": [
|
81578 |
6,
|
@@ -81586,8 +81720,8 @@
|
|
81586 |
},
|
81587 |
"up_proj": {
|
81588 |
"group_size": {
|
81589 |
-
"6":
|
81590 |
-
"5":
|
81591 |
},
|
81592 |
"bits": [
|
81593 |
6,
|
@@ -81602,8 +81736,8 @@
|
|
81602 |
"down_proj": {
|
81603 |
"group_size": {
|
81604 |
"8": 32,
|
81605 |
-
"6":
|
81606 |
-
"5":
|
81607 |
},
|
81608 |
"bits": [
|
81609 |
8,
|
@@ -81618,8 +81752,8 @@
|
|
81618 |
"scale_bits": 4
|
81619 |
}
|
81620 |
},
|
81621 |
-
"model.layers.
|
81622 |
-
"accuracy": 0.
|
81623 |
"total_bits": 349009920,
|
81624 |
"q_proj": {
|
81625 |
"group_size": {
|
@@ -81679,108 +81813,13 @@
|
|
81679 |
"scale_bits": 4
|
81680 |
}
|
81681 |
},
|
81682 |
-
"model.layers.35.mlp": {
|
81683 |
-
"accuracy": 0.9944175193086267,
|
81684 |
-
"total_bits": 1670626608,
|
81685 |
-
"gate_proj": {
|
81686 |
-
"group_size": {
|
81687 |
-
"6": 128
|
81688 |
-
},
|
81689 |
-
"bits": [
|
81690 |
-
6
|
81691 |
-
],
|
81692 |
-
"bits_prop": [
|
81693 |
-
1
|
81694 |
-
],
|
81695 |
-
"scale_bits": 4
|
81696 |
-
},
|
81697 |
-
"up_proj": {
|
81698 |
-
"group_size": {
|
81699 |
-
"6": 128
|
81700 |
-
},
|
81701 |
-
"bits": [
|
81702 |
-
6
|
81703 |
-
],
|
81704 |
-
"bits_prop": [
|
81705 |
-
1
|
81706 |
-
],
|
81707 |
-
"scale_bits": 4
|
81708 |
-
},
|
81709 |
-
"down_proj": {
|
81710 |
-
"group_size": {
|
81711 |
-
"8": 32,
|
81712 |
-
"6": 128
|
81713 |
-
},
|
81714 |
-
"bits": [
|
81715 |
-
8,
|
81716 |
-
6
|
81717 |
-
],
|
81718 |
-
"bits_prop": [
|
81719 |
-
0.05,
|
81720 |
-
0.95
|
81721 |
-
],
|
81722 |
-
"scale_bits": 4
|
81723 |
-
}
|
81724 |
-
},
|
81725 |
-
"model.layers.36.self_attn": {
|
81726 |
-
"accuracy": 0.9982832411769778,
|
81727 |
-
"total_bits": 415201280,
|
81728 |
-
"q_proj": {
|
81729 |
-
"group_size": {
|
81730 |
-
"6": 32
|
81731 |
-
},
|
81732 |
-
"bits": [
|
81733 |
-
6
|
81734 |
-
],
|
81735 |
-
"bits_prop": [
|
81736 |
-
1
|
81737 |
-
],
|
81738 |
-
"scale_bits": 4
|
81739 |
-
},
|
81740 |
-
"k_proj": {
|
81741 |
-
"group_size": {
|
81742 |
-
"6": 32
|
81743 |
-
},
|
81744 |
-
"bits": [
|
81745 |
-
6
|
81746 |
-
],
|
81747 |
-
"bits_prop": [
|
81748 |
-
1
|
81749 |
-
],
|
81750 |
-
"scale_bits": 4
|
81751 |
-
},
|
81752 |
-
"v_proj": {
|
81753 |
-
"group_size": {
|
81754 |
-
"8": 32
|
81755 |
-
},
|
81756 |
-
"bits": [
|
81757 |
-
8
|
81758 |
-
],
|
81759 |
-
"bits_prop": [
|
81760 |
-
1
|
81761 |
-
],
|
81762 |
-
"scale_bits": 4
|
81763 |
-
},
|
81764 |
-
"o_proj": {
|
81765 |
-
"group_size": {
|
81766 |
-
"6": 32
|
81767 |
-
},
|
81768 |
-
"bits": [
|
81769 |
-
6
|
81770 |
-
],
|
81771 |
-
"bits_prop": [
|
81772 |
-
1
|
81773 |
-
],
|
81774 |
-
"scale_bits": 4
|
81775 |
-
}
|
81776 |
-
},
|
81777 |
"model.layers.36.mlp": {
|
81778 |
-
"accuracy": 0.
|
81779 |
-
"total_bits":
|
81780 |
"gate_proj": {
|
81781 |
"group_size": {
|
81782 |
-
"6":
|
81783 |
-
"5":
|
81784 |
},
|
81785 |
"bits": [
|
81786 |
6,
|
@@ -81794,8 +81833,8 @@
|
|
81794 |
},
|
81795 |
"up_proj": {
|
81796 |
"group_size": {
|
81797 |
-
"6":
|
81798 |
-
"5":
|
81799 |
},
|
81800 |
"bits": [
|
81801 |
6,
|
@@ -81810,8 +81849,8 @@
|
|
81810 |
"down_proj": {
|
81811 |
"group_size": {
|
81812 |
"8": 32,
|
81813 |
-
"6":
|
81814 |
-
"5":
|
81815 |
},
|
81816 |
"bits": [
|
81817 |
8,
|
@@ -81888,8 +81927,8 @@
|
|
81888 |
}
|
81889 |
},
|
81890 |
"model.layers.37.mlp": {
|
81891 |
-
"accuracy": 0.
|
81892 |
-
"total_bits":
|
81893 |
"gate_proj": {
|
81894 |
"group_size": {
|
81895 |
"8": 128,
|
@@ -81922,29 +81961,26 @@
|
|
81922 |
},
|
81923 |
"down_proj": {
|
81924 |
"group_size": {
|
81925 |
-
"8": 128
|
81926 |
-
"6": 128
|
81927 |
},
|
81928 |
"bits": [
|
81929 |
-
8
|
81930 |
-
6
|
81931 |
],
|
81932 |
"bits_prop": [
|
81933 |
-
|
81934 |
-
0.85
|
81935 |
],
|
81936 |
"scale_bits": 4
|
81937 |
}
|
81938 |
},
|
81939 |
"model.layers.38.self_attn": {
|
81940 |
-
"accuracy": 0.
|
81941 |
-
"total_bits":
|
81942 |
"q_proj": {
|
81943 |
"group_size": {
|
81944 |
-
"
|
81945 |
},
|
81946 |
"bits": [
|
81947 |
-
|
81948 |
],
|
81949 |
"bits_prop": [
|
81950 |
1
|
@@ -81953,10 +81989,10 @@
|
|
81953 |
},
|
81954 |
"k_proj": {
|
81955 |
"group_size": {
|
81956 |
-
"
|
81957 |
},
|
81958 |
"bits": [
|
81959 |
-
|
81960 |
],
|
81961 |
"bits_prop": [
|
81962 |
1
|
@@ -81965,7 +82001,7 @@
|
|
81965 |
},
|
81966 |
"v_proj": {
|
81967 |
"group_size": {
|
81968 |
-
"8":
|
81969 |
},
|
81970 |
"bits": [
|
81971 |
8
|
@@ -81977,10 +82013,10 @@
|
|
81977 |
},
|
81978 |
"o_proj": {
|
81979 |
"group_size": {
|
81980 |
-
"
|
81981 |
},
|
81982 |
"bits": [
|
81983 |
-
|
81984 |
],
|
81985 |
"bits_prop": [
|
81986 |
1
|
@@ -81989,12 +82025,12 @@
|
|
81989 |
}
|
81990 |
},
|
81991 |
"model.layers.38.mlp": {
|
81992 |
-
"accuracy": 0.
|
81993 |
-
"total_bits":
|
81994 |
"gate_proj": {
|
81995 |
"group_size": {
|
81996 |
-
"6":
|
81997 |
-
"5":
|
81998 |
},
|
81999 |
"bits": [
|
82000 |
6,
|
@@ -82008,8 +82044,8 @@
|
|
82008 |
},
|
82009 |
"up_proj": {
|
82010 |
"group_size": {
|
82011 |
-
"6":
|
82012 |
-
"5":
|
82013 |
},
|
82014 |
"bits": [
|
82015 |
6,
|
@@ -82024,8 +82060,8 @@
|
|
82024 |
"down_proj": {
|
82025 |
"group_size": {
|
82026 |
"8": 32,
|
82027 |
-
"6":
|
82028 |
-
"5":
|
82029 |
},
|
82030 |
"bits": [
|
82031 |
8,
|
@@ -82041,53 +82077,65 @@
|
|
82041 |
}
|
82042 |
},
|
82043 |
"model.layers.39.self_attn": {
|
82044 |
-
"accuracy": 0.
|
82045 |
-
"total_bits":
|
82046 |
"q_proj": {
|
82047 |
"group_size": {
|
|
|
82048 |
"4": 128
|
82049 |
},
|
82050 |
"bits": [
|
|
|
82051 |
4
|
82052 |
],
|
82053 |
"bits_prop": [
|
82054 |
-
1
|
|
|
82055 |
],
|
82056 |
"scale_bits": 4
|
82057 |
},
|
82058 |
"k_proj": {
|
82059 |
"group_size": {
|
|
|
82060 |
"4": 128
|
82061 |
},
|
82062 |
"bits": [
|
|
|
82063 |
4
|
82064 |
],
|
82065 |
"bits_prop": [
|
82066 |
-
1
|
|
|
82067 |
],
|
82068 |
"scale_bits": 4
|
82069 |
},
|
82070 |
"v_proj": {
|
82071 |
"group_size": {
|
82072 |
-
"
|
|
|
82073 |
},
|
82074 |
"bits": [
|
|
|
82075 |
4
|
82076 |
],
|
82077 |
"bits_prop": [
|
82078 |
-
1
|
|
|
82079 |
],
|
82080 |
"scale_bits": 4
|
82081 |
},
|
82082 |
"o_proj": {
|
82083 |
"group_size": {
|
|
|
82084 |
"4": 128
|
82085 |
},
|
82086 |
"bits": [
|
|
|
82087 |
4
|
82088 |
],
|
82089 |
"bits_prop": [
|
82090 |
-
1
|
|
|
82091 |
],
|
82092 |
"scale_bits": 4
|
82093 |
}
|
|
|
16952 |
}
|
16953 |
},
|
16954 |
{
|
16955 |
+
"accuracy": 0.9644066467881203,
|
16956 |
"total_bits": 899923248,
|
16957 |
"gate_proj": {
|
16958 |
"group_size": {
|
|
|
17056 |
}
|
17057 |
},
|
17058 |
{
|
17059 |
+
"accuracy": 0.9716205019503832,
|
17060 |
"total_bits": 995125808,
|
17061 |
"gate_proj": {
|
17062 |
"group_size": {
|
|
|
17105 |
}
|
17106 |
},
|
17107 |
{
|
17108 |
+
"accuracy": 0.9818738773465157,
|
17109 |
"total_bits": 1137906608,
|
17110 |
"gate_proj": {
|
17111 |
"group_size": {
|
|
|
17899 |
}
|
17900 |
},
|
17901 |
{
|
17902 |
+
"accuracy": 0.9735957626253366,
|
17903 |
"total_bits": 206079360,
|
17904 |
"q_proj": {
|
17905 |
"group_size": {
|
|
|
21545 |
}
|
21546 |
},
|
21547 |
{
|
21548 |
+
"accuracy": 0.9386431761085987,
|
21549 |
"total_bits": 143375360,
|
21550 |
"q_proj": {
|
21551 |
"group_size": {
|
|
|
24554 |
],
|
24555 |
"model.layers.12.mlp": [
|
24556 |
{
|
24557 |
+
"accuracy": 0.9032091200351715,
|
24558 |
"total_bits": 614790432,
|
24559 |
"gate_proj": {
|
24560 |
"group_size": {
|
|
|
24606 |
}
|
24607 |
},
|
24608 |
{
|
24609 |
+
"accuracy": 0.9056077301502228,
|
24610 |
"total_bits": 637728032,
|
24611 |
"gate_proj": {
|
24612 |
"group_size": {
|
|
|
46269 |
}
|
46270 |
},
|
46271 |
{
|
46272 |
+
"accuracy": 0.949563205242157,
|
46273 |
"total_bits": 925328640,
|
46274 |
"gate_proj": {
|
46275 |
"group_size": {
|
|
|
51868 |
],
|
51869 |
"model.layers.26.mlp": [
|
51870 |
{
|
51871 |
+
"accuracy": 0.8996819406747818,
|
51872 |
"total_bits": 614790432,
|
51873 |
"gate_proj": {
|
51874 |
"group_size": {
|
|
|
55136 |
}
|
55137 |
},
|
55138 |
{
|
55139 |
+
"accuracy": 0.9861949309706688,
|
55140 |
"total_bits": 267324160,
|
55141 |
"q_proj": {
|
55142 |
"group_size": {
|
|
|
55188 |
}
|
55189 |
},
|
55190 |
{
|
55191 |
+
"accuracy": 0.9868358941748738,
|
55192 |
"total_bits": 271022080,
|
55193 |
"q_proj": {
|
55194 |
"group_size": {
|
|
|
62992 |
}
|
62993 |
},
|
62994 |
{
|
62995 |
+
"accuracy": 0.9891389207914472,
|
62996 |
"total_bits": 271022080,
|
62997 |
"q_proj": {
|
62998 |
"group_size": {
|
|
|
64403 |
],
|
64404 |
"model.layers.33.self_attn": [
|
64405 |
{
|
64406 |
+
"accuracy": 0.9657053500413895,
|
64407 |
"total_bits": 139115520,
|
64408 |
"q_proj": {
|
64409 |
"group_size": {
|
|
|
78063 |
},
|
78064 |
"strategy": {
|
78065 |
"model.layers.0.self_attn": {
|
78066 |
+
"accuracy": 0.9954015873372555,
|
78067 |
+
"total_bits": 415201280,
|
78068 |
"q_proj": {
|
78069 |
"group_size": {
|
78070 |
+
"6": 32
|
78071 |
},
|
78072 |
"bits": [
|
78073 |
+
6
|
78074 |
],
|
78075 |
"bits_prop": [
|
78076 |
1
|
|
|
78079 |
},
|
78080 |
"k_proj": {
|
78081 |
"group_size": {
|
78082 |
+
"6": 32
|
78083 |
},
|
78084 |
"bits": [
|
78085 |
+
6
|
78086 |
],
|
78087 |
"bits_prop": [
|
78088 |
1
|
|
|
78091 |
},
|
78092 |
"v_proj": {
|
78093 |
"group_size": {
|
78094 |
+
"8": 32
|
78095 |
},
|
78096 |
"bits": [
|
78097 |
8
|
|
|
78103 |
},
|
78104 |
"o_proj": {
|
78105 |
"group_size": {
|
78106 |
+
"6": 32
|
78107 |
},
|
78108 |
"bits": [
|
78109 |
+
6
|
78110 |
],
|
78111 |
"bits_prop": [
|
78112 |
1
|
|
|
78167 |
}
|
78168 |
},
|
78169 |
"model.layers.1.self_attn": {
|
78170 |
+
"accuracy": 0.9952727472409606,
|
78171 |
+
"total_bits": 415201280,
|
78172 |
"q_proj": {
|
78173 |
"group_size": {
|
78174 |
+
"6": 32
|
78175 |
},
|
78176 |
"bits": [
|
78177 |
+
6
|
78178 |
],
|
78179 |
"bits_prop": [
|
78180 |
1
|
|
|
78183 |
},
|
78184 |
"k_proj": {
|
78185 |
"group_size": {
|
78186 |
+
"6": 32
|
78187 |
},
|
78188 |
"bits": [
|
78189 |
+
6
|
78190 |
],
|
78191 |
"bits_prop": [
|
78192 |
1
|
|
|
78195 |
},
|
78196 |
"v_proj": {
|
78197 |
"group_size": {
|
78198 |
+
"8": 32
|
78199 |
},
|
78200 |
"bits": [
|
78201 |
8
|
|
|
78207 |
},
|
78208 |
"o_proj": {
|
78209 |
"group_size": {
|
78210 |
+
"6": 32
|
78211 |
},
|
78212 |
"bits": [
|
78213 |
+
6
|
78214 |
],
|
78215 |
"bits_prop": [
|
78216 |
1
|
|
|
78271 |
}
|
78272 |
},
|
78273 |
"model.layers.2.self_attn": {
|
78274 |
+
"accuracy": 0.9947458985261619,
|
78275 |
+
"total_bits": 349009920,
|
78276 |
"q_proj": {
|
78277 |
"group_size": {
|
78278 |
+
"6": 32,
|
78279 |
+
"5": 32
|
78280 |
},
|
78281 |
"bits": [
|
78282 |
+
6,
|
78283 |
+
5
|
78284 |
],
|
78285 |
"bits_prop": [
|
78286 |
+
0.1,
|
78287 |
+
0.9
|
78288 |
],
|
78289 |
"scale_bits": 4
|
78290 |
},
|
78291 |
"k_proj": {
|
78292 |
"group_size": {
|
78293 |
+
"6": 32,
|
78294 |
+
"5": 32
|
78295 |
},
|
78296 |
"bits": [
|
78297 |
+
6,
|
78298 |
+
5
|
78299 |
],
|
78300 |
"bits_prop": [
|
78301 |
+
0.1,
|
78302 |
+
0.9
|
78303 |
],
|
78304 |
"scale_bits": 4
|
78305 |
},
|
78306 |
"v_proj": {
|
78307 |
"group_size": {
|
78308 |
+
"6": 32
|
78309 |
},
|
78310 |
"bits": [
|
78311 |
+
6
|
78312 |
],
|
78313 |
"bits_prop": [
|
78314 |
1
|
|
|
78317 |
},
|
78318 |
"o_proj": {
|
78319 |
"group_size": {
|
78320 |
+
"6": 32,
|
78321 |
+
"5": 32
|
78322 |
},
|
78323 |
"bits": [
|
78324 |
+
6,
|
78325 |
+
5
|
78326 |
],
|
78327 |
"bits_prop": [
|
78328 |
+
0.1,
|
78329 |
+
0.9
|
78330 |
],
|
78331 |
"scale_bits": 4
|
78332 |
}
|
78333 |
},
|
78334 |
"model.layers.2.mlp": {
|
78335 |
+
"accuracy": 0.9968946953304112,
|
78336 |
+
"total_bits": 1881221440,
|
78337 |
"gate_proj": {
|
78338 |
"group_size": {
|
78339 |
"8": 128,
|
|
|
78366 |
},
|
78367 |
"down_proj": {
|
78368 |
"group_size": {
|
78369 |
+
"8": 128
|
|
|
78370 |
},
|
78371 |
"bits": [
|
78372 |
+
8
|
|
|
78373 |
],
|
78374 |
"bits_prop": [
|
78375 |
+
1
|
|
|
78376 |
],
|
78377 |
"scale_bits": 4
|
78378 |
}
|
78379 |
},
|
78380 |
"model.layers.3.self_attn": {
|
78381 |
+
"accuracy": 0.9970399681478739,
|
78382 |
+
"total_bits": 349009920,
|
78383 |
"q_proj": {
|
78384 |
"group_size": {
|
78385 |
+
"6": 32,
|
78386 |
+
"5": 32
|
78387 |
},
|
78388 |
"bits": [
|
78389 |
6,
|
|
|
78397 |
},
|
78398 |
"k_proj": {
|
78399 |
"group_size": {
|
78400 |
+
"6": 32,
|
78401 |
+
"5": 32
|
78402 |
},
|
78403 |
"bits": [
|
78404 |
6,
|
|
|
78412 |
},
|
78413 |
"v_proj": {
|
78414 |
"group_size": {
|
78415 |
+
"6": 32
|
78416 |
},
|
78417 |
"bits": [
|
78418 |
6
|
|
|
78424 |
},
|
78425 |
"o_proj": {
|
78426 |
"group_size": {
|
78427 |
+
"6": 32,
|
78428 |
+
"5": 32
|
78429 |
},
|
78430 |
"bits": [
|
78431 |
6,
|
|
|
78662 |
}
|
78663 |
},
|
78664 |
"model.layers.5.mlp": {
|
78665 |
+
"accuracy": 0.9969794370699674,
|
78666 |
+
"total_bits": 1725245760,
|
78667 |
"gate_proj": {
|
78668 |
"group_size": {
|
78669 |
+
"8": 128,
|
78670 |
+
"6": 128
|
78671 |
},
|
78672 |
"bits": [
|
78673 |
+
8,
|
78674 |
+
6
|
78675 |
],
|
78676 |
"bits_prop": [
|
78677 |
0.1,
|
|
|
78681 |
},
|
78682 |
"up_proj": {
|
78683 |
"group_size": {
|
78684 |
+
"8": 128,
|
78685 |
+
"6": 128
|
78686 |
},
|
78687 |
"bits": [
|
78688 |
+
8,
|
78689 |
+
6
|
78690 |
],
|
78691 |
"bits_prop": [
|
78692 |
+
0.1,
|
78693 |
+
0.9
|
78694 |
],
|
78695 |
"scale_bits": 4
|
78696 |
},
|
78697 |
"down_proj": {
|
78698 |
"group_size": {
|
78699 |
+
"8": 128,
|
78700 |
+
"6": 128
|
|
|
78701 |
},
|
78702 |
"bits": [
|
78703 |
8,
|
78704 |
+
6
|
|
|
78705 |
],
|
78706 |
"bits_prop": [
|
78707 |
+
0.15,
|
|
|
78708 |
0.85
|
78709 |
],
|
78710 |
"scale_bits": 4
|
78711 |
}
|
78712 |
},
|
78713 |
"model.layers.6.self_attn": {
|
78714 |
+
"accuracy": 0.9961103489622474,
|
78715 |
+
"total_bits": 349009920,
|
78716 |
"q_proj": {
|
78717 |
"group_size": {
|
78718 |
+
"6": 32,
|
78719 |
+
"5": 32
|
78720 |
},
|
78721 |
"bits": [
|
78722 |
+
6,
|
78723 |
+
5
|
78724 |
],
|
78725 |
"bits_prop": [
|
78726 |
+
0.1,
|
78727 |
+
0.9
|
78728 |
],
|
78729 |
"scale_bits": 4
|
78730 |
},
|
78731 |
"k_proj": {
|
78732 |
"group_size": {
|
78733 |
+
"6": 32,
|
78734 |
+
"5": 32
|
78735 |
},
|
78736 |
"bits": [
|
78737 |
+
6,
|
78738 |
+
5
|
78739 |
],
|
78740 |
"bits_prop": [
|
78741 |
+
0.1,
|
78742 |
+
0.9
|
78743 |
],
|
78744 |
"scale_bits": 4
|
78745 |
},
|
78746 |
"v_proj": {
|
78747 |
"group_size": {
|
78748 |
+
"6": 32
|
78749 |
},
|
78750 |
"bits": [
|
78751 |
+
6
|
78752 |
],
|
78753 |
"bits_prop": [
|
78754 |
1
|
|
|
78757 |
},
|
78758 |
"o_proj": {
|
78759 |
"group_size": {
|
78760 |
+
"6": 32,
|
78761 |
+
"5": 32
|
78762 |
},
|
78763 |
"bits": [
|
78764 |
+
6,
|
78765 |
+
5
|
78766 |
],
|
78767 |
"bits_prop": [
|
78768 |
+
0.1,
|
78769 |
+
0.9
|
78770 |
],
|
78771 |
"scale_bits": 4
|
78772 |
}
|
|
|
78824 |
}
|
78825 |
},
|
78826 |
"model.layers.7.self_attn": {
|
78827 |
+
"accuracy": 0.9964394683483988,
|
78828 |
+
"total_bits": 395927040,
|
78829 |
"q_proj": {
|
78830 |
"group_size": {
|
78831 |
+
"6": 128
|
78832 |
},
|
78833 |
"bits": [
|
78834 |
6
|
|
|
78840 |
},
|
78841 |
"k_proj": {
|
78842 |
"group_size": {
|
78843 |
+
"6": 128
|
78844 |
},
|
78845 |
"bits": [
|
78846 |
6
|
|
|
78852 |
},
|
78853 |
"v_proj": {
|
78854 |
"group_size": {
|
78855 |
+
"6": 128
|
78856 |
},
|
78857 |
"bits": [
|
78858 |
+
6
|
78859 |
],
|
78860 |
"bits_prop": [
|
78861 |
1
|
|
|
78864 |
},
|
78865 |
"o_proj": {
|
78866 |
"group_size": {
|
78867 |
+
"6": 128
|
78868 |
},
|
78869 |
"bits": [
|
78870 |
6
|
|
|
78876 |
}
|
78877 |
},
|
78878 |
"model.layers.7.mlp": {
|
78879 |
+
"accuracy": 0.9841652419418097,
|
78880 |
+
"total_bits": 1170586928,
|
78881 |
+
"gate_proj": {
|
78882 |
+
"group_size": {
|
78883 |
+
"5": 128,
|
78884 |
+
"4": 128
|
78885 |
+
},
|
78886 |
+
"bits": [
|
78887 |
+
5,
|
78888 |
+
4
|
78889 |
+
],
|
78890 |
+
"bits_prop": [
|
78891 |
+
0.1,
|
78892 |
+
0.9
|
78893 |
+
],
|
78894 |
+
"scale_bits": 4
|
78895 |
+
},
|
78896 |
+
"up_proj": {
|
78897 |
+
"group_size": {
|
78898 |
+
"5": 128,
|
78899 |
+
"4": 128
|
78900 |
+
},
|
78901 |
+
"bits": [
|
78902 |
+
5,
|
78903 |
+
4
|
78904 |
+
],
|
78905 |
+
"bits_prop": [
|
78906 |
+
0.25,
|
78907 |
+
0.75
|
78908 |
+
],
|
78909 |
+
"scale_bits": 4
|
78910 |
+
},
|
78911 |
+
"down_proj": {
|
78912 |
+
"group_size": {
|
78913 |
+
"8": 32,
|
78914 |
+
"5": 128,
|
78915 |
+
"4": 128
|
78916 |
+
},
|
78917 |
+
"bits": [
|
78918 |
+
8,
|
78919 |
+
5,
|
78920 |
+
4
|
78921 |
+
],
|
78922 |
+
"bits_prop": [
|
78923 |
+
0.05,
|
78924 |
+
0.1,
|
78925 |
+
0.85
|
78926 |
+
],
|
78927 |
+
"scale_bits": 4
|
78928 |
+
}
|
78929 |
+
},
|
78930 |
+
"model.layers.8.self_attn": {
|
78931 |
+
"accuracy": 0.9952646759338677,
|
78932 |
+
"total_bits": 349009920,
|
78933 |
+
"q_proj": {
|
78934 |
+
"group_size": {
|
78935 |
+
"6": 32,
|
78936 |
+
"5": 32
|
78937 |
+
},
|
78938 |
+
"bits": [
|
78939 |
+
6,
|
78940 |
+
5
|
78941 |
+
],
|
78942 |
+
"bits_prop": [
|
78943 |
+
0.1,
|
78944 |
+
0.9
|
78945 |
+
],
|
78946 |
+
"scale_bits": 4
|
78947 |
+
},
|
78948 |
+
"k_proj": {
|
78949 |
+
"group_size": {
|
78950 |
+
"6": 32,
|
78951 |
+
"5": 32
|
78952 |
+
},
|
78953 |
+
"bits": [
|
78954 |
+
6,
|
78955 |
+
5
|
78956 |
+
],
|
78957 |
+
"bits_prop": [
|
78958 |
+
0.1,
|
78959 |
+
0.9
|
78960 |
+
],
|
78961 |
+
"scale_bits": 4
|
78962 |
+
},
|
78963 |
+
"v_proj": {
|
78964 |
+
"group_size": {
|
78965 |
+
"6": 32
|
78966 |
+
},
|
78967 |
+
"bits": [
|
78968 |
+
6
|
78969 |
+
],
|
78970 |
+
"bits_prop": [
|
78971 |
+
1
|
78972 |
+
],
|
78973 |
+
"scale_bits": 4
|
78974 |
+
},
|
78975 |
+
"o_proj": {
|
78976 |
+
"group_size": {
|
78977 |
+
"6": 32,
|
78978 |
+
"5": 32
|
78979 |
+
},
|
78980 |
+
"bits": [
|
78981 |
+
6,
|
78982 |
+
5
|
78983 |
+
],
|
78984 |
+
"bits_prop": [
|
78985 |
+
0.1,
|
78986 |
+
0.9
|
78987 |
+
],
|
78988 |
+
"scale_bits": 4
|
78989 |
+
}
|
78990 |
+
},
|
78991 |
+
"model.layers.8.mlp": {
|
78992 |
+
"accuracy": 0.9921151725575328,
|
78993 |
"total_bits": 1466656000,
|
78994 |
"gate_proj": {
|
78995 |
"group_size": {
|
|
|
79040 |
"scale_bits": 4
|
79041 |
}
|
79042 |
},
|
79043 |
+
"model.layers.9.self_attn": {
|
79044 |
+
"accuracy": 0.9976912483107299,
|
79045 |
+
"total_bits": 415201280,
|
79046 |
"q_proj": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79047 |
"group_size": {
|
79048 |
"6": 32
|
79049 |
},
|
|
|
79055 |
],
|
79056 |
"scale_bits": 4
|
79057 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79058 |
"k_proj": {
|
79059 |
"group_size": {
|
79060 |
+
"6": 32
|
79061 |
},
|
79062 |
"bits": [
|
79063 |
+
6
|
79064 |
],
|
79065 |
"bits_prop": [
|
79066 |
1
|
|
|
79069 |
},
|
79070 |
"v_proj": {
|
79071 |
"group_size": {
|
79072 |
+
"8": 32
|
79073 |
},
|
79074 |
"bits": [
|
79075 |
8
|
|
|
79081 |
},
|
79082 |
"o_proj": {
|
79083 |
"group_size": {
|
79084 |
+
"6": 32
|
79085 |
},
|
79086 |
"bits": [
|
79087 |
+
6
|
79088 |
],
|
79089 |
"bits_prop": [
|
79090 |
1
|
|
|
79093 |
}
|
79094 |
},
|
79095 |
"model.layers.9.mlp": {
|
79096 |
+
"accuracy": 0.995745147112757,
|
79097 |
+
"total_bits": 1881221440,
|
79098 |
"gate_proj": {
|
79099 |
"group_size": {
|
79100 |
+
"8": 128,
|
79101 |
+
"6": 128
|
79102 |
},
|
79103 |
"bits": [
|
79104 |
+
8,
|
79105 |
+
6
|
79106 |
],
|
79107 |
"bits_prop": [
|
79108 |
0.1,
|
|
|
79112 |
},
|
79113 |
"up_proj": {
|
79114 |
"group_size": {
|
79115 |
+
"8": 128,
|
79116 |
+
"6": 128
|
79117 |
},
|
79118 |
"bits": [
|
79119 |
+
8,
|
79120 |
+
6
|
79121 |
],
|
79122 |
"bits_prop": [
|
79123 |
+
0.1,
|
79124 |
+
0.9
|
79125 |
],
|
79126 |
"scale_bits": 4
|
79127 |
},
|
79128 |
"down_proj": {
|
79129 |
"group_size": {
|
79130 |
+
"8": 128
|
|
|
|
|
79131 |
},
|
79132 |
"bits": [
|
79133 |
+
8
|
|
|
|
|
79134 |
],
|
79135 |
"bits_prop": [
|
79136 |
+
1
|
|
|
|
|
79137 |
],
|
79138 |
"scale_bits": 4
|
79139 |
}
|
79140 |
},
|
79141 |
"model.layers.10.self_attn": {
|
79142 |
+
"accuracy": 0.9932247875258327,
|
79143 |
+
"total_bits": 342842880,
|
79144 |
"q_proj": {
|
79145 |
"group_size": {
|
79146 |
+
"6": 128,
|
79147 |
+
"5": 128
|
79148 |
},
|
79149 |
"bits": [
|
79150 |
6,
|
|
|
79158 |
},
|
79159 |
"k_proj": {
|
79160 |
"group_size": {
|
79161 |
+
"6": 128,
|
79162 |
+
"5": 128
|
79163 |
},
|
79164 |
"bits": [
|
79165 |
6,
|
|
|
79173 |
},
|
79174 |
"v_proj": {
|
79175 |
"group_size": {
|
79176 |
+
"6": 128
|
79177 |
},
|
79178 |
"bits": [
|
79179 |
6
|
|
|
79185 |
},
|
79186 |
"o_proj": {
|
79187 |
"group_size": {
|
79188 |
+
"6": 128,
|
79189 |
+
"5": 128
|
79190 |
},
|
79191 |
"bits": [
|
79192 |
6,
|
|
|
79200 |
}
|
79201 |
},
|
79202 |
"model.layers.10.mlp": {
|
79203 |
+
"accuracy": 0.9939747792668641,
|
79204 |
+
"total_bits": 1670626608,
|
79205 |
"gate_proj": {
|
79206 |
"group_size": {
|
79207 |
+
"6": 128
|
|
|
79208 |
},
|
79209 |
"bits": [
|
79210 |
+
6
|
|
|
79211 |
],
|
79212 |
"bits_prop": [
|
79213 |
+
1
|
|
|
79214 |
],
|
79215 |
"scale_bits": 4
|
79216 |
},
|
79217 |
"up_proj": {
|
79218 |
"group_size": {
|
79219 |
+
"6": 128
|
|
|
79220 |
},
|
79221 |
"bits": [
|
79222 |
+
6
|
|
|
79223 |
],
|
79224 |
"bits_prop": [
|
79225 |
+
1
|
|
|
79226 |
],
|
79227 |
"scale_bits": 4
|
79228 |
},
|
79229 |
"down_proj": {
|
79230 |
"group_size": {
|
79231 |
"8": 32,
|
79232 |
+
"6": 128
|
|
|
79233 |
},
|
79234 |
"bits": [
|
79235 |
8,
|
79236 |
+
6
|
|
|
79237 |
],
|
79238 |
"bits_prop": [
|
79239 |
0.05,
|
79240 |
+
0.95
|
|
|
79241 |
],
|
79242 |
"scale_bits": 4
|
79243 |
}
|
79244 |
},
|
79245 |
"model.layers.11.self_attn": {
|
79246 |
+
"accuracy": 0.99367392109707,
|
79247 |
+
"total_bits": 349009920,
|
79248 |
"q_proj": {
|
79249 |
"group_size": {
|
79250 |
+
"6": 32,
|
79251 |
+
"5": 32
|
79252 |
},
|
79253 |
"bits": [
|
79254 |
+
6,
|
79255 |
+
5
|
79256 |
],
|
79257 |
"bits_prop": [
|
79258 |
+
0.1,
|
79259 |
+
0.9
|
79260 |
],
|
79261 |
"scale_bits": 4
|
79262 |
},
|
79263 |
"k_proj": {
|
79264 |
"group_size": {
|
79265 |
+
"6": 32,
|
79266 |
+
"5": 32
|
79267 |
},
|
79268 |
"bits": [
|
79269 |
+
6,
|
79270 |
+
5
|
79271 |
],
|
79272 |
"bits_prop": [
|
79273 |
+
0.1,
|
79274 |
+
0.9
|
79275 |
],
|
79276 |
"scale_bits": 4
|
79277 |
},
|
79278 |
"v_proj": {
|
79279 |
"group_size": {
|
79280 |
+
"6": 32
|
79281 |
},
|
79282 |
"bits": [
|
79283 |
+
6
|
79284 |
],
|
79285 |
"bits_prop": [
|
79286 |
1
|
|
|
79289 |
},
|
79290 |
"o_proj": {
|
79291 |
"group_size": {
|
79292 |
+
"6": 32,
|
79293 |
+
"5": 32
|
79294 |
},
|
79295 |
"bits": [
|
79296 |
+
6,
|
79297 |
+
5
|
79298 |
],
|
79299 |
"bits_prop": [
|
79300 |
+
0.1,
|
79301 |
+
0.9
|
79302 |
],
|
79303 |
"scale_bits": 4
|
79304 |
}
|
79305 |
},
|
79306 |
"model.layers.11.mlp": {
|
79307 |
+
"accuracy": 0.9950231495313346,
|
79308 |
+
"total_bits": 1881221440,
|
79309 |
"gate_proj": {
|
79310 |
"group_size": {
|
79311 |
"8": 128,
|
|
|
79338 |
},
|
79339 |
"down_proj": {
|
79340 |
"group_size": {
|
79341 |
+
"8": 128
|
|
|
79342 |
},
|
79343 |
"bits": [
|
79344 |
+
8
|
|
|
79345 |
],
|
79346 |
"bits_prop": [
|
79347 |
+
1
|
|
|
79348 |
],
|
79349 |
"scale_bits": 4
|
79350 |
}
|
79351 |
},
|
79352 |
"model.layers.12.self_attn": {
|
79353 |
+
"accuracy": 0.9964596622157842,
|
79354 |
+
"total_bits": 415201280,
|
79355 |
"q_proj": {
|
79356 |
"group_size": {
|
79357 |
+
"6": 32
|
79358 |
},
|
79359 |
"bits": [
|
79360 |
+
6
|
79361 |
],
|
79362 |
"bits_prop": [
|
79363 |
1
|
|
|
79366 |
},
|
79367 |
"k_proj": {
|
79368 |
"group_size": {
|
79369 |
+
"6": 32
|
79370 |
},
|
79371 |
"bits": [
|
79372 |
+
6
|
79373 |
],
|
79374 |
"bits_prop": [
|
79375 |
1
|
|
|
79378 |
},
|
79379 |
"v_proj": {
|
79380 |
"group_size": {
|
79381 |
+
"8": 32
|
79382 |
},
|
79383 |
"bits": [
|
79384 |
8
|
|
|
79390 |
},
|
79391 |
"o_proj": {
|
79392 |
"group_size": {
|
79393 |
+
"6": 32
|
79394 |
},
|
79395 |
"bits": [
|
79396 |
+
6
|
79397 |
],
|
79398 |
"bits_prop": [
|
79399 |
1
|
|
|
79402 |
}
|
79403 |
},
|
79404 |
"model.layers.12.mlp": {
|
79405 |
+
"accuracy": 0.9948012055829167,
|
79406 |
+
"total_bits": 1881221440,
|
79407 |
"gate_proj": {
|
79408 |
"group_size": {
|
79409 |
"8": 128,
|
|
|
79435 |
"scale_bits": 4
|
79436 |
},
|
79437 |
"down_proj": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79438 |
"group_size": {
|
79439 |
"8": 128
|
79440 |
},
|
|
|
79447 |
"scale_bits": 4
|
79448 |
}
|
79449 |
},
|
79450 |
+
"model.layers.13.self_attn": {
|
79451 |
+
"accuracy": 0.9948677592910826,
|
79452 |
+
"total_bits": 395927040,
|
79453 |
+
"q_proj": {
|
79454 |
"group_size": {
|
79455 |
"6": 128
|
79456 |
},
|
|
|
79462 |
],
|
79463 |
"scale_bits": 4
|
79464 |
},
|
79465 |
+
"k_proj": {
|
79466 |
"group_size": {
|
79467 |
"6": 128
|
79468 |
},
|
|
|
79474 |
],
|
79475 |
"scale_bits": 4
|
79476 |
},
|
79477 |
+
"v_proj": {
|
79478 |
"group_size": {
|
|
|
79479 |
"6": 128
|
79480 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79481 |
"bits": [
|
79482 |
6
|
79483 |
],
|
|
|
79486 |
],
|
79487 |
"scale_bits": 4
|
79488 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79489 |
"o_proj": {
|
79490 |
"group_size": {
|
79491 |
+
"6": 128
|
79492 |
},
|
79493 |
"bits": [
|
79494 |
6
|
|
|
79499 |
"scale_bits": 4
|
79500 |
}
|
79501 |
},
|
79502 |
+
"model.layers.13.mlp": {
|
79503 |
+
"accuracy": 0.994574514683336,
|
79504 |
+
"total_bits": 1881221440,
|
79505 |
"gate_proj": {
|
79506 |
"group_size": {
|
79507 |
"8": 128,
|
|
|
79534 |
},
|
79535 |
"down_proj": {
|
79536 |
"group_size": {
|
79537 |
+
"8": 128
|
|
|
79538 |
},
|
79539 |
"bits": [
|
79540 |
+
8
|
|
|
79541 |
],
|
79542 |
"bits_prop": [
|
79543 |
+
1
|
|
|
79544 |
],
|
79545 |
"scale_bits": 4
|
79546 |
}
|
79547 |
},
|
79548 |
+
"model.layers.14.self_attn": {
|
79549 |
+
"accuracy": 0.9961008524987847,
|
79550 |
"total_bits": 415201280,
|
79551 |
"q_proj": {
|
79552 |
"group_size": {
|
|
|
79597 |
"scale_bits": 4
|
79598 |
}
|
79599 |
},
|
79600 |
+
"model.layers.14.mlp": {
|
79601 |
+
"accuracy": 0.9884496573358774,
|
79602 |
+
"total_bits": 1466656000,
|
79603 |
"gate_proj": {
|
79604 |
"group_size": {
|
79605 |
+
"6": 32,
|
79606 |
+
"5": 32
|
79607 |
},
|
79608 |
"bits": [
|
79609 |
+
6,
|
79610 |
+
5
|
79611 |
],
|
79612 |
"bits_prop": [
|
79613 |
+
0.1,
|
79614 |
+
0.9
|
79615 |
],
|
79616 |
"scale_bits": 4
|
79617 |
},
|
79618 |
"up_proj": {
|
79619 |
"group_size": {
|
79620 |
+
"6": 32,
|
79621 |
+
"5": 32
|
79622 |
},
|
79623 |
"bits": [
|
79624 |
+
6,
|
79625 |
+
5
|
79626 |
],
|
79627 |
"bits_prop": [
|
79628 |
+
0.25,
|
79629 |
+
0.75
|
79630 |
],
|
79631 |
"scale_bits": 4
|
79632 |
},
|
79633 |
"down_proj": {
|
79634 |
"group_size": {
|
79635 |
"8": 32,
|
79636 |
+
"6": 32,
|
79637 |
+
"5": 32
|
79638 |
},
|
79639 |
"bits": [
|
79640 |
8,
|
79641 |
+
6,
|
79642 |
+
5
|
79643 |
],
|
79644 |
"bits_prop": [
|
79645 |
0.05,
|
79646 |
+
0.1,
|
79647 |
+
0.85
|
79648 |
],
|
79649 |
"scale_bits": 4
|
79650 |
}
|
79651 |
},
|
79652 |
+
"model.layers.15.self_attn": {
|
79653 |
+
"accuracy": 0.9985494788270444,
|
79654 |
"total_bits": 526999040,
|
79655 |
"q_proj": {
|
79656 |
"group_size": {
|
|
|
79701 |
"scale_bits": 4
|
79702 |
}
|
79703 |
},
|
79704 |
+
"model.layers.15.mlp": {
|
79705 |
+
"accuracy": 0.9926823936402798,
|
79706 |
+
"total_bits": 1670626608,
|
79707 |
"gate_proj": {
|
79708 |
"group_size": {
|
|
|
79709 |
"6": 128
|
79710 |
},
|
79711 |
"bits": [
|
|
|
79712 |
6
|
79713 |
],
|
79714 |
"bits_prop": [
|
79715 |
+
1
|
|
|
79716 |
],
|
79717 |
"scale_bits": 4
|
79718 |
},
|
79719 |
"up_proj": {
|
79720 |
"group_size": {
|
|
|
79721 |
"6": 128
|
79722 |
},
|
79723 |
"bits": [
|
|
|
79724 |
6
|
79725 |
],
|
79726 |
"bits_prop": [
|
79727 |
+
1
|
|
|
79728 |
],
|
79729 |
"scale_bits": 4
|
79730 |
},
|
79731 |
"down_proj": {
|
79732 |
"group_size": {
|
79733 |
+
"8": 32,
|
79734 |
"6": 128
|
79735 |
},
|
79736 |
"bits": [
|
|
|
79738 |
6
|
79739 |
],
|
79740 |
"bits_prop": [
|
79741 |
+
0.05,
|
79742 |
+
0.95
|
79743 |
],
|
79744 |
"scale_bits": 4
|
79745 |
}
|
79746 |
},
|
79747 |
+
"model.layers.16.self_attn": {
|
79748 |
+
"accuracy": 0.9957664785906672,
|
79749 |
"total_bits": 415201280,
|
79750 |
"q_proj": {
|
79751 |
"group_size": {
|
|
|
79796 |
"scale_bits": 4
|
79797 |
}
|
79798 |
},
|
79799 |
+
"model.layers.16.mlp": {
|
79800 |
+
"accuracy": 0.9944000607356429,
|
79801 |
"total_bits": 1881221440,
|
79802 |
"gate_proj": {
|
79803 |
"group_size": {
|
|
|
79842 |
"scale_bits": 4
|
79843 |
}
|
79844 |
},
|
79845 |
+
"model.layers.17.self_attn": {
|
79846 |
+
"accuracy": 0.9957772488705814,
|
79847 |
+
"total_bits": 415201280,
|
79848 |
"q_proj": {
|
79849 |
"group_size": {
|
79850 |
+
"6": 32
|
79851 |
},
|
79852 |
"bits": [
|
79853 |
+
6
|
79854 |
],
|
79855 |
"bits_prop": [
|
79856 |
1
|
|
|
79859 |
},
|
79860 |
"k_proj": {
|
79861 |
"group_size": {
|
79862 |
+
"6": 32
|
79863 |
},
|
79864 |
"bits": [
|
79865 |
+
6
|
79866 |
],
|
79867 |
"bits_prop": [
|
79868 |
1
|
|
|
79871 |
},
|
79872 |
"v_proj": {
|
79873 |
"group_size": {
|
79874 |
+
"8": 32
|
79875 |
},
|
79876 |
"bits": [
|
79877 |
8
|
|
|
79883 |
},
|
79884 |
"o_proj": {
|
79885 |
"group_size": {
|
79886 |
+
"6": 32
|
79887 |
},
|
79888 |
"bits": [
|
79889 |
+
6
|
79890 |
],
|
79891 |
"bits_prop": [
|
79892 |
1
|
|
|
79894 |
"scale_bits": 4
|
79895 |
}
|
79896 |
},
|
79897 |
+
"model.layers.17.mlp": {
|
79898 |
+
"accuracy": 0.987975318916142,
|
79899 |
+
"total_bits": 1466656000,
|
79900 |
"gate_proj": {
|
79901 |
"group_size": {
|
79902 |
+
"6": 32,
|
79903 |
+
"5": 32
|
79904 |
},
|
79905 |
"bits": [
|
79906 |
+
6,
|
79907 |
+
5
|
79908 |
],
|
79909 |
"bits_prop": [
|
79910 |
0.1,
|
|
|
79914 |
},
|
79915 |
"up_proj": {
|
79916 |
"group_size": {
|
79917 |
+
"6": 32,
|
79918 |
+
"5": 32
|
79919 |
},
|
79920 |
"bits": [
|
79921 |
+
6,
|
79922 |
+
5
|
79923 |
],
|
79924 |
"bits_prop": [
|
79925 |
+
0.25,
|
79926 |
+
0.75
|
79927 |
],
|
79928 |
"scale_bits": 4
|
79929 |
},
|
79930 |
"down_proj": {
|
79931 |
"group_size": {
|
79932 |
+
"8": 32,
|
79933 |
+
"6": 32,
|
79934 |
+
"5": 32
|
79935 |
},
|
79936 |
"bits": [
|
79937 |
8,
|
79938 |
+
6,
|
79939 |
+
5
|
79940 |
],
|
79941 |
"bits_prop": [
|
79942 |
+
0.05,
|
79943 |
+
0.1,
|
79944 |
0.85
|
79945 |
],
|
79946 |
"scale_bits": 4
|
79947 |
}
|
79948 |
},
|
79949 |
+
"model.layers.18.self_attn": {
|
79950 |
+
"accuracy": 0.9938682704232633,
|
79951 |
+
"total_bits": 395927040,
|
79952 |
"q_proj": {
|
79953 |
"group_size": {
|
79954 |
+
"6": 128
|
79955 |
},
|
79956 |
"bits": [
|
79957 |
6
|
|
|
79963 |
},
|
79964 |
"k_proj": {
|
79965 |
"group_size": {
|
79966 |
+
"6": 128
|
79967 |
},
|
79968 |
"bits": [
|
79969 |
6
|
|
|
79975 |
},
|
79976 |
"v_proj": {
|
79977 |
"group_size": {
|
79978 |
+
"6": 128
|
79979 |
},
|
79980 |
"bits": [
|
79981 |
+
6
|
79982 |
],
|
79983 |
"bits_prop": [
|
79984 |
1
|
|
|
79987 |
},
|
79988 |
"o_proj": {
|
79989 |
"group_size": {
|
79990 |
+
"6": 128
|
79991 |
},
|
79992 |
"bits": [
|
79993 |
6
|
|
|
79998 |
"scale_bits": 4
|
79999 |
}
|
80000 |
},
|
80001 |
+
"model.layers.18.mlp": {
|
80002 |
+
"accuracy": 0.9924099026247859,
|
80003 |
"total_bits": 1670626608,
|
80004 |
"gate_proj": {
|
80005 |
"group_size": {
|
|
|
80041 |
"scale_bits": 4
|
80042 |
}
|
80043 |
},
|
80044 |
+
"model.layers.19.self_attn": {
|
80045 |
+
"accuracy": 0.9984620407922193,
|
80046 |
"total_bits": 526999040,
|
80047 |
"q_proj": {
|
80048 |
"group_size": {
|
|
|
80093 |
"scale_bits": 4
|
80094 |
}
|
80095 |
},
|
80096 |
+
"model.layers.19.mlp": {
|
80097 |
+
"accuracy": 0.9926131763495505,
|
80098 |
"total_bits": 1725245760,
|
80099 |
"gate_proj": {
|
80100 |
"group_size": {
|
|
|
80142 |
"scale_bits": 4
|
80143 |
}
|
80144 |
},
|
80145 |
+
"model.layers.20.self_attn": {
|
80146 |
+
"accuracy": 0.9922737274318933,
|
80147 |
+
"total_bits": 349009920,
|
80148 |
"q_proj": {
|
80149 |
"group_size": {
|
80150 |
+
"6": 32,
|
80151 |
+
"5": 32
|
80152 |
},
|
80153 |
"bits": [
|
80154 |
+
6,
|
80155 |
+
5
|
80156 |
],
|
80157 |
"bits_prop": [
|
80158 |
+
0.1,
|
80159 |
+
0.9
|
80160 |
],
|
80161 |
"scale_bits": 4
|
80162 |
},
|
80163 |
"k_proj": {
|
80164 |
"group_size": {
|
80165 |
+
"6": 32,
|
80166 |
+
"5": 32
|
80167 |
},
|
80168 |
"bits": [
|
80169 |
+
6,
|
80170 |
+
5
|
80171 |
],
|
80172 |
"bits_prop": [
|
80173 |
+
0.1,
|
80174 |
+
0.9
|
80175 |
],
|
80176 |
"scale_bits": 4
|
80177 |
},
|
80178 |
"v_proj": {
|
80179 |
"group_size": {
|
80180 |
+
"6": 32
|
80181 |
},
|
80182 |
"bits": [
|
80183 |
+
6
|
80184 |
],
|
80185 |
"bits_prop": [
|
80186 |
1
|
|
|
80189 |
},
|
80190 |
"o_proj": {
|
80191 |
"group_size": {
|
80192 |
+
"6": 32,
|
80193 |
+
"5": 32
|
80194 |
},
|
80195 |
"bits": [
|
80196 |
+
6,
|
80197 |
+
5
|
80198 |
],
|
80199 |
"bits_prop": [
|
80200 |
+
0.1,
|
80201 |
+
0.9
|
80202 |
],
|
80203 |
"scale_bits": 4
|
80204 |
}
|
80205 |
},
|
80206 |
+
"model.layers.20.mlp": {
|
80207 |
+
"accuracy": 0.9920412814244628,
|
80208 |
"total_bits": 1670626608,
|
80209 |
"gate_proj": {
|
80210 |
"group_size": {
|
|
|
80246 |
"scale_bits": 4
|
80247 |
}
|
80248 |
},
|
80249 |
+
"model.layers.21.self_attn": {
|
80250 |
+
"accuracy": 0.9984692038269714,
|
80251 |
"total_bits": 526999040,
|
80252 |
"q_proj": {
|
80253 |
"group_size": {
|
|
|
80298 |
"scale_bits": 4
|
80299 |
}
|
80300 |
},
|
80301 |
+
"model.layers.21.mlp": {
|
80302 |
+
"accuracy": 0.9939599921926856,
|
80303 |
+
"total_bits": 1881221440,
|
80304 |
+
"gate_proj": {
|
80305 |
+
"group_size": {
|
80306 |
+
"8": 128,
|
80307 |
+
"6": 128
|
80308 |
+
},
|
80309 |
+
"bits": [
|
80310 |
+
8,
|
80311 |
+
6
|
80312 |
+
],
|
80313 |
+
"bits_prop": [
|
80314 |
+
0.1,
|
80315 |
+
0.9
|
80316 |
+
],
|
80317 |
+
"scale_bits": 4
|
80318 |
+
},
|
80319 |
+
"up_proj": {
|
80320 |
+
"group_size": {
|
80321 |
+
"8": 128,
|
80322 |
+
"6": 128
|
80323 |
+
},
|
80324 |
+
"bits": [
|
80325 |
+
8,
|
80326 |
+
6
|
80327 |
+
],
|
80328 |
+
"bits_prop": [
|
80329 |
+
0.1,
|
80330 |
+
0.9
|
80331 |
+
],
|
80332 |
+
"scale_bits": 4
|
80333 |
+
},
|
80334 |
+
"down_proj": {
|
80335 |
+
"group_size": {
|
80336 |
+
"8": 128
|
80337 |
+
},
|
80338 |
+
"bits": [
|
80339 |
+
8
|
80340 |
+
],
|
80341 |
+
"bits_prop": [
|
80342 |
+
1
|
80343 |
+
],
|
80344 |
+
"scale_bits": 4
|
80345 |
+
}
|
80346 |
+
},
|
80347 |
+
"model.layers.22.self_attn": {
|
80348 |
+
"accuracy": 0.9961780766025186,
|
80349 |
+
"total_bits": 415201280,
|
80350 |
+
"q_proj": {
|
80351 |
+
"group_size": {
|
80352 |
+
"6": 32
|
80353 |
+
},
|
80354 |
+
"bits": [
|
80355 |
+
6
|
80356 |
+
],
|
80357 |
+
"bits_prop": [
|
80358 |
+
1
|
80359 |
+
],
|
80360 |
+
"scale_bits": 4
|
80361 |
+
},
|
80362 |
+
"k_proj": {
|
80363 |
+
"group_size": {
|
80364 |
+
"6": 32
|
80365 |
+
},
|
80366 |
+
"bits": [
|
80367 |
+
6
|
80368 |
+
],
|
80369 |
+
"bits_prop": [
|
80370 |
+
1
|
80371 |
+
],
|
80372 |
+
"scale_bits": 4
|
80373 |
+
},
|
80374 |
+
"v_proj": {
|
80375 |
+
"group_size": {
|
80376 |
+
"8": 32
|
80377 |
+
},
|
80378 |
+
"bits": [
|
80379 |
+
8
|
80380 |
+
],
|
80381 |
+
"bits_prop": [
|
80382 |
+
1
|
80383 |
+
],
|
80384 |
+
"scale_bits": 4
|
80385 |
+
},
|
80386 |
+
"o_proj": {
|
80387 |
+
"group_size": {
|
80388 |
+
"6": 32
|
80389 |
+
},
|
80390 |
+
"bits": [
|
80391 |
+
6
|
80392 |
+
],
|
80393 |
+
"bits_prop": [
|
80394 |
+
1
|
80395 |
+
],
|
80396 |
+
"scale_bits": 4
|
80397 |
+
}
|
80398 |
+
},
|
80399 |
"model.layers.22.mlp": {
|
80400 |
"accuracy": 0.9926852826029062,
|
80401 |
"total_bits": 1725245760,
|
|
|
80541 |
}
|
80542 |
},
|
80543 |
"model.layers.24.self_attn": {
|
80544 |
+
"accuracy": 0.9965322115458548,
|
80545 |
+
"total_bits": 415201280,
|
80546 |
"q_proj": {
|
80547 |
"group_size": {
|
80548 |
+
"6": 32
|
80549 |
},
|
80550 |
"bits": [
|
80551 |
+
6
|
80552 |
],
|
80553 |
"bits_prop": [
|
80554 |
1
|
|
|
80557 |
},
|
80558 |
"k_proj": {
|
80559 |
"group_size": {
|
80560 |
+
"6": 32
|
80561 |
},
|
80562 |
"bits": [
|
80563 |
+
6
|
80564 |
],
|
80565 |
"bits_prop": [
|
80566 |
1
|
|
|
80569 |
},
|
80570 |
"v_proj": {
|
80571 |
"group_size": {
|
80572 |
+
"8": 32
|
80573 |
},
|
80574 |
"bits": [
|
80575 |
8
|
|
|
80581 |
},
|
80582 |
"o_proj": {
|
80583 |
"group_size": {
|
80584 |
+
"6": 32
|
80585 |
},
|
80586 |
"bits": [
|
80587 |
+
6
|
80588 |
],
|
80589 |
"bits_prop": [
|
80590 |
1
|
|
|
80636 |
}
|
80637 |
},
|
80638 |
"model.layers.25.self_attn": {
|
80639 |
+
"accuracy": 0.9966988901142031,
|
80640 |
+
"total_bits": 415201280,
|
80641 |
"q_proj": {
|
80642 |
"group_size": {
|
80643 |
+
"6": 32
|
80644 |
},
|
80645 |
"bits": [
|
80646 |
+
6
|
80647 |
],
|
80648 |
"bits_prop": [
|
80649 |
1
|
|
|
80652 |
},
|
80653 |
"k_proj": {
|
80654 |
"group_size": {
|
80655 |
+
"6": 32
|
80656 |
},
|
80657 |
"bits": [
|
80658 |
+
6
|
80659 |
],
|
80660 |
"bits_prop": [
|
80661 |
1
|
|
|
80664 |
},
|
80665 |
"v_proj": {
|
80666 |
"group_size": {
|
80667 |
+
"8": 32
|
80668 |
},
|
80669 |
"bits": [
|
80670 |
8
|
|
|
80675 |
"scale_bits": 4
|
80676 |
},
|
80677 |
"o_proj": {
|
80678 |
+
"group_size": {
|
80679 |
+
"6": 32
|
80680 |
+
},
|
80681 |
+
"bits": [
|
80682 |
+
6
|
80683 |
+
],
|
80684 |
+
"bits_prop": [
|
80685 |
+
1
|
80686 |
+
],
|
80687 |
+
"scale_bits": 4
|
80688 |
+
}
|
80689 |
+
},
|
80690 |
+
"model.layers.25.mlp": {
|
80691 |
+
"accuracy": 0.9943082556128502,
|
80692 |
+
"total_bits": 1881221440,
|
80693 |
+
"gate_proj": {
|
80694 |
+
"group_size": {
|
80695 |
+
"8": 128,
|
80696 |
+
"6": 128
|
80697 |
+
},
|
80698 |
+
"bits": [
|
80699 |
+
8,
|
80700 |
+
6
|
80701 |
+
],
|
80702 |
+
"bits_prop": [
|
80703 |
+
0.1,
|
80704 |
+
0.9
|
80705 |
+
],
|
80706 |
+
"scale_bits": 4
|
80707 |
+
},
|
80708 |
+
"up_proj": {
|
80709 |
+
"group_size": {
|
80710 |
+
"8": 128,
|
80711 |
+
"6": 128
|
80712 |
+
},
|
80713 |
+
"bits": [
|
80714 |
+
8,
|
80715 |
+
6
|
80716 |
+
],
|
80717 |
+
"bits_prop": [
|
80718 |
+
0.1,
|
80719 |
+
0.9
|
80720 |
+
],
|
80721 |
+
"scale_bits": 4
|
80722 |
+
},
|
80723 |
+
"down_proj": {
|
80724 |
"group_size": {
|
80725 |
"8": 128
|
80726 |
},
|
|
|
80733 |
"scale_bits": 4
|
80734 |
}
|
80735 |
},
|
80736 |
+
"model.layers.26.self_attn": {
|
80737 |
+
"accuracy": 0.9938132991082966,
|
80738 |
+
"total_bits": 349009920,
|
80739 |
+
"q_proj": {
|
80740 |
+
"group_size": {
|
80741 |
+
"6": 32,
|
80742 |
+
"5": 32
|
80743 |
+
},
|
80744 |
+
"bits": [
|
80745 |
+
6,
|
80746 |
+
5
|
80747 |
+
],
|
80748 |
+
"bits_prop": [
|
80749 |
+
0.1,
|
80750 |
+
0.9
|
80751 |
+
],
|
80752 |
+
"scale_bits": 4
|
80753 |
+
},
|
80754 |
+
"k_proj": {
|
80755 |
+
"group_size": {
|
80756 |
+
"6": 32,
|
80757 |
+
"5": 32
|
80758 |
+
},
|
80759 |
+
"bits": [
|
80760 |
+
6,
|
80761 |
+
5
|
80762 |
+
],
|
80763 |
+
"bits_prop": [
|
80764 |
+
0.1,
|
80765 |
+
0.9
|
80766 |
+
],
|
80767 |
+
"scale_bits": 4
|
80768 |
+
},
|
80769 |
+
"v_proj": {
|
80770 |
+
"group_size": {
|
80771 |
+
"6": 32
|
80772 |
+
},
|
80773 |
+
"bits": [
|
80774 |
+
6
|
80775 |
+
],
|
80776 |
+
"bits_prop": [
|
80777 |
+
1
|
80778 |
+
],
|
80779 |
+
"scale_bits": 4
|
80780 |
+
},
|
80781 |
+
"o_proj": {
|
80782 |
+
"group_size": {
|
80783 |
+
"6": 32,
|
80784 |
+
"5": 32
|
80785 |
+
},
|
80786 |
+
"bits": [
|
80787 |
+
6,
|
80788 |
+
5
|
80789 |
+
],
|
80790 |
+
"bits_prop": [
|
80791 |
+
0.1,
|
80792 |
+
0.9
|
80793 |
+
],
|
80794 |
+
"scale_bits": 4
|
80795 |
+
}
|
80796 |
+
},
|
80797 |
+
"model.layers.26.mlp": {
|
80798 |
+
"accuracy": 0.9929717490449548,
|
80799 |
"total_bits": 1670626608,
|
80800 |
"gate_proj": {
|
80801 |
"group_size": {
|
|
|
80837 |
"scale_bits": 4
|
80838 |
}
|
80839 |
},
|
80840 |
+
"model.layers.27.self_attn": {
|
80841 |
+
"accuracy": 0.9940781220793724,
|
80842 |
+
"total_bits": 349009920,
|
80843 |
"q_proj": {
|
80844 |
"group_size": {
|
80845 |
+
"6": 32,
|
80846 |
+
"5": 32
|
80847 |
},
|
80848 |
"bits": [
|
80849 |
+
6,
|
80850 |
+
5
|
80851 |
],
|
80852 |
"bits_prop": [
|
80853 |
+
0.1,
|
80854 |
+
0.9
|
80855 |
],
|
80856 |
"scale_bits": 4
|
80857 |
},
|
80858 |
"k_proj": {
|
80859 |
"group_size": {
|
80860 |
+
"6": 32,
|
80861 |
+
"5": 32
|
80862 |
},
|
80863 |
"bits": [
|
80864 |
+
6,
|
80865 |
+
5
|
80866 |
],
|
80867 |
"bits_prop": [
|
80868 |
+
0.1,
|
80869 |
+
0.9
|
80870 |
],
|
80871 |
"scale_bits": 4
|
80872 |
},
|
80873 |
"v_proj": {
|
80874 |
"group_size": {
|
80875 |
+
"6": 32
|
80876 |
},
|
80877 |
"bits": [
|
80878 |
+
6
|
80879 |
],
|
80880 |
"bits_prop": [
|
80881 |
1
|
|
|
80884 |
},
|
80885 |
"o_proj": {
|
80886 |
"group_size": {
|
80887 |
+
"6": 32,
|
80888 |
+
"5": 32
|
80889 |
},
|
80890 |
"bits": [
|
80891 |
+
6,
|
80892 |
+
5
|
80893 |
],
|
80894 |
"bits_prop": [
|
80895 |
+
0.1,
|
80896 |
+
0.9
|
80897 |
],
|
80898 |
"scale_bits": 4
|
80899 |
}
|
80900 |
},
|
80901 |
+
"model.layers.27.mlp": {
|
80902 |
+
"accuracy": 0.9933040356263518,
|
80903 |
"total_bits": 1670626608,
|
80904 |
"gate_proj": {
|
80905 |
"group_size": {
|
|
|
80941 |
"scale_bits": 4
|
80942 |
}
|
80943 |
},
|
80944 |
+
"model.layers.28.self_attn": {
|
80945 |
+
"accuracy": 0.9957931926473975,
|
80946 |
+
"total_bits": 395927040,
|
80947 |
"q_proj": {
|
80948 |
"group_size": {
|
80949 |
+
"6": 128
|
80950 |
},
|
80951 |
"bits": [
|
80952 |
6
|
|
|
80958 |
},
|
80959 |
"k_proj": {
|
80960 |
"group_size": {
|
80961 |
+
"6": 128
|
80962 |
},
|
80963 |
"bits": [
|
80964 |
6
|
|
|
80970 |
},
|
80971 |
"v_proj": {
|
80972 |
"group_size": {
|
80973 |
+
"6": 128
|
80974 |
},
|
80975 |
"bits": [
|
80976 |
+
6
|
80977 |
],
|
80978 |
"bits_prop": [
|
80979 |
1
|
|
|
80982 |
},
|
80983 |
"o_proj": {
|
80984 |
"group_size": {
|
80985 |
+
"6": 128
|
80986 |
},
|
80987 |
"bits": [
|
80988 |
6
|
|
|
80993 |
"scale_bits": 4
|
80994 |
}
|
80995 |
},
|
80996 |
+
"model.layers.28.mlp": {
|
80997 |
+
"accuracy": 0.9936611945740879,
|
80998 |
+
"total_bits": 1670626608,
|
80999 |
"gate_proj": {
|
81000 |
"group_size": {
|
|
|
81001 |
"6": 128
|
81002 |
},
|
81003 |
"bits": [
|
|
|
81004 |
6
|
81005 |
],
|
81006 |
"bits_prop": [
|
81007 |
+
1
|
|
|
81008 |
],
|
81009 |
"scale_bits": 4
|
81010 |
},
|
81011 |
"up_proj": {
|
81012 |
"group_size": {
|
|
|
81013 |
"6": 128
|
81014 |
},
|
81015 |
"bits": [
|
|
|
81016 |
6
|
81017 |
],
|
81018 |
"bits_prop": [
|
81019 |
+
1
|
|
|
81020 |
],
|
81021 |
"scale_bits": 4
|
81022 |
},
|
81023 |
"down_proj": {
|
81024 |
"group_size": {
|
81025 |
+
"8": 32,
|
81026 |
+
"6": 128
|
81027 |
},
|
81028 |
"bits": [
|
81029 |
+
8,
|
81030 |
+
6
|
81031 |
],
|
81032 |
"bits_prop": [
|
81033 |
+
0.05,
|
81034 |
+
0.95
|
81035 |
],
|
81036 |
"scale_bits": 4
|
81037 |
}
|
81038 |
},
|
81039 |
+
"model.layers.29.self_attn": {
|
81040 |
+
"accuracy": 0.997671986464411,
|
81041 |
"total_bits": 415201280,
|
81042 |
"q_proj": {
|
81043 |
"group_size": {
|
|
|
81088 |
"scale_bits": 4
|
81089 |
}
|
81090 |
},
|
81091 |
+
"model.layers.29.mlp": {
|
81092 |
+
"accuracy": 0.9884004453197122,
|
81093 |
+
"total_bits": 1441250608,
|
81094 |
"gate_proj": {
|
81095 |
"group_size": {
|
81096 |
+
"6": 128,
|
81097 |
+
"5": 128
|
81098 |
},
|
81099 |
"bits": [
|
81100 |
+
6,
|
81101 |
+
5
|
81102 |
],
|
81103 |
"bits_prop": [
|
81104 |
+
0.1,
|
81105 |
+
0.9
|
81106 |
],
|
81107 |
"scale_bits": 4
|
81108 |
},
|
81109 |
"up_proj": {
|
81110 |
"group_size": {
|
81111 |
+
"6": 128,
|
81112 |
+
"5": 128
|
81113 |
},
|
81114 |
"bits": [
|
81115 |
+
6,
|
81116 |
+
5
|
81117 |
],
|
81118 |
"bits_prop": [
|
81119 |
+
0.25,
|
81120 |
+
0.75
|
81121 |
],
|
81122 |
"scale_bits": 4
|
81123 |
},
|
81124 |
"down_proj": {
|
81125 |
"group_size": {
|
81126 |
"8": 32,
|
81127 |
+
"6": 128,
|
81128 |
+
"5": 128
|
81129 |
},
|
81130 |
"bits": [
|
81131 |
8,
|
81132 |
+
6,
|
81133 |
+
5
|
81134 |
],
|
81135 |
"bits_prop": [
|
81136 |
0.05,
|
81137 |
+
0.1,
|
81138 |
+
0.85
|
81139 |
],
|
81140 |
"scale_bits": 4
|
81141 |
}
|
81142 |
},
|
81143 |
+
"model.layers.30.self_attn": {
|
81144 |
+
"accuracy": 0.9975796616636217,
|
81145 |
"total_bits": 415201280,
|
81146 |
"q_proj": {
|
81147 |
"group_size": {
|
|
|
81192 |
"scale_bits": 4
|
81193 |
}
|
81194 |
},
|
81195 |
+
"model.layers.30.mlp": {
|
81196 |
+
"accuracy": 0.9939606585539877,
|
81197 |
+
"total_bits": 1670626608,
|
81198 |
"gate_proj": {
|
81199 |
"group_size": {
|
81200 |
+
"6": 128
|
|
|
81201 |
},
|
81202 |
"bits": [
|
81203 |
+
6
|
|
|
81204 |
],
|
81205 |
"bits_prop": [
|
81206 |
+
1
|
|
|
81207 |
],
|
81208 |
"scale_bits": 4
|
81209 |
},
|
81210 |
"up_proj": {
|
81211 |
"group_size": {
|
81212 |
+
"6": 128
|
|
|
81213 |
},
|
81214 |
"bits": [
|
81215 |
+
6
|
|
|
81216 |
],
|
81217 |
"bits_prop": [
|
81218 |
+
1
|
|
|
81219 |
],
|
81220 |
"scale_bits": 4
|
81221 |
},
|
81222 |
"down_proj": {
|
81223 |
"group_size": {
|
81224 |
"8": 32,
|
81225 |
+
"6": 128
|
|
|
81226 |
},
|
81227 |
"bits": [
|
81228 |
8,
|
81229 |
+
6
|
|
|
81230 |
],
|
81231 |
"bits_prop": [
|
81232 |
0.05,
|
81233 |
+
0.95
|
|
|
81234 |
],
|
81235 |
"scale_bits": 4
|
81236 |
}
|
81237 |
},
|
81238 |
+
"model.layers.31.self_attn": {
|
81239 |
+
"accuracy": 0.997897554654628,
|
81240 |
+
"total_bits": 415201280,
|
81241 |
"q_proj": {
|
81242 |
"group_size": {
|
81243 |
+
"6": 32
|
81244 |
},
|
81245 |
"bits": [
|
81246 |
6
|
|
|
81252 |
},
|
81253 |
"k_proj": {
|
81254 |
"group_size": {
|
81255 |
+
"6": 32
|
81256 |
},
|
81257 |
"bits": [
|
81258 |
6
|
|
|
81264 |
},
|
81265 |
"v_proj": {
|
81266 |
"group_size": {
|
81267 |
+
"8": 32
|
81268 |
},
|
81269 |
"bits": [
|
81270 |
+
8
|
81271 |
],
|
81272 |
"bits_prop": [
|
81273 |
1
|
|
|
81276 |
},
|
81277 |
"o_proj": {
|
81278 |
"group_size": {
|
81279 |
+
"6": 32
|
81280 |
},
|
81281 |
"bits": [
|
81282 |
6
|
|
|
81287 |
"scale_bits": 4
|
81288 |
}
|
81289 |
},
|
81290 |
+
"model.layers.31.mlp": {
|
81291 |
+
"accuracy": 0.9940744298510253,
|
81292 |
+
"total_bits": 1670626608,
|
81293 |
"gate_proj": {
|
81294 |
"group_size": {
|
81295 |
+
"6": 128
|
|
|
81296 |
},
|
81297 |
"bits": [
|
81298 |
+
6
|
|
|
81299 |
],
|
81300 |
"bits_prop": [
|
81301 |
+
1
|
|
|
81302 |
],
|
81303 |
"scale_bits": 4
|
81304 |
},
|
81305 |
"up_proj": {
|
81306 |
"group_size": {
|
81307 |
+
"6": 128
|
|
|
81308 |
},
|
81309 |
"bits": [
|
81310 |
+
6
|
|
|
81311 |
],
|
81312 |
"bits_prop": [
|
81313 |
+
1
|
|
|
81314 |
],
|
81315 |
"scale_bits": 4
|
81316 |
},
|
81317 |
"down_proj": {
|
81318 |
"group_size": {
|
81319 |
"8": 32,
|
81320 |
+
"6": 128
|
|
|
81321 |
},
|
81322 |
"bits": [
|
81323 |
8,
|
81324 |
+
6
|
|
|
81325 |
],
|
81326 |
"bits_prop": [
|
81327 |
0.05,
|
81328 |
+
0.95
|
|
|
81329 |
],
|
81330 |
"scale_bits": 4
|
81331 |
}
|
81332 |
},
|
81333 |
+
"model.layers.32.self_attn": {
|
81334 |
+
"accuracy": 0.9947752063162625,
|
81335 |
+
"total_bits": 342842880,
|
81336 |
"q_proj": {
|
81337 |
"group_size": {
|
81338 |
+
"6": 128,
|
81339 |
+
"5": 128
|
81340 |
},
|
81341 |
"bits": [
|
81342 |
+
6,
|
81343 |
+
5
|
81344 |
],
|
81345 |
"bits_prop": [
|
81346 |
+
0.1,
|
81347 |
+
0.9
|
81348 |
],
|
81349 |
"scale_bits": 4
|
81350 |
},
|
81351 |
"k_proj": {
|
81352 |
"group_size": {
|
81353 |
+
"6": 128,
|
81354 |
+
"5": 128
|
81355 |
},
|
81356 |
"bits": [
|
81357 |
+
6,
|
81358 |
+
5
|
81359 |
],
|
81360 |
"bits_prop": [
|
81361 |
+
0.1,
|
81362 |
+
0.9
|
81363 |
],
|
81364 |
"scale_bits": 4
|
81365 |
},
|
81366 |
"v_proj": {
|
81367 |
"group_size": {
|
81368 |
+
"6": 128
|
81369 |
},
|
81370 |
"bits": [
|
81371 |
+
6
|
81372 |
],
|
81373 |
"bits_prop": [
|
81374 |
1
|
|
|
81377 |
},
|
81378 |
"o_proj": {
|
81379 |
"group_size": {
|
81380 |
+
"6": 128,
|
81381 |
+
"5": 128
|
81382 |
},
|
81383 |
"bits": [
|
81384 |
+
6,
|
81385 |
+
5
|
81386 |
],
|
81387 |
"bits_prop": [
|
81388 |
+
0.1,
|
81389 |
+
0.9
|
81390 |
],
|
81391 |
"scale_bits": 4
|
81392 |
}
|
81393 |
},
|
81394 |
+
"model.layers.32.mlp": {
|
81395 |
+
"accuracy": 0.9891814850270748,
|
81396 |
+
"total_bits": 1441250608,
|
81397 |
"gate_proj": {
|
81398 |
"group_size": {
|
81399 |
+
"6": 128,
|
81400 |
+
"5": 128
|
81401 |
},
|
81402 |
"bits": [
|
81403 |
+
6,
|
81404 |
+
5
|
81405 |
],
|
81406 |
"bits_prop": [
|
81407 |
+
0.1,
|
81408 |
+
0.9
|
81409 |
],
|
81410 |
"scale_bits": 4
|
81411 |
},
|
81412 |
"up_proj": {
|
81413 |
"group_size": {
|
81414 |
+
"6": 128,
|
81415 |
+
"5": 128
|
81416 |
},
|
81417 |
"bits": [
|
81418 |
+
6,
|
81419 |
+
5
|
81420 |
],
|
81421 |
"bits_prop": [
|
81422 |
+
0.25,
|
81423 |
+
0.75
|
81424 |
],
|
81425 |
"scale_bits": 4
|
81426 |
},
|
81427 |
"down_proj": {
|
81428 |
"group_size": {
|
81429 |
"8": 32,
|
81430 |
+
"6": 128,
|
81431 |
+
"5": 128
|
81432 |
},
|
81433 |
"bits": [
|
81434 |
8,
|
81435 |
+
6,
|
81436 |
+
5
|
81437 |
],
|
81438 |
"bits_prop": [
|
81439 |
0.05,
|
81440 |
+
0.1,
|
81441 |
+
0.85
|
81442 |
],
|
81443 |
"scale_bits": 4
|
81444 |
}
|
81445 |
},
|
81446 |
+
"model.layers.33.self_attn": {
|
81447 |
+
"accuracy": 0.999251619039569,
|
81448 |
"total_bits": 526999040,
|
81449 |
"q_proj": {
|
81450 |
"group_size": {
|
|
|
81495 |
"scale_bits": 4
|
81496 |
}
|
81497 |
},
|
81498 |
+
"model.layers.33.mlp": {
|
81499 |
+
"accuracy": 0.9945772625505924,
|
81500 |
+
"total_bits": 1725245760,
|
81501 |
"gate_proj": {
|
81502 |
"group_size": {
|
81503 |
+
"8": 128,
|
81504 |
"6": 128
|
81505 |
},
|
81506 |
"bits": [
|
81507 |
+
8,
|
81508 |
6
|
81509 |
],
|
81510 |
"bits_prop": [
|
81511 |
+
0.1,
|
81512 |
+
0.9
|
81513 |
],
|
81514 |
"scale_bits": 4
|
81515 |
},
|
81516 |
"up_proj": {
|
81517 |
"group_size": {
|
81518 |
+
"8": 128,
|
81519 |
"6": 128
|
81520 |
},
|
81521 |
"bits": [
|
81522 |
+
8,
|
81523 |
6
|
81524 |
],
|
81525 |
"bits_prop": [
|
81526 |
+
0.1,
|
81527 |
+
0.9
|
81528 |
],
|
81529 |
"scale_bits": 4
|
81530 |
},
|
81531 |
"down_proj": {
|
81532 |
"group_size": {
|
81533 |
+
"8": 128,
|
81534 |
"6": 128
|
81535 |
},
|
81536 |
"bits": [
|
|
|
81538 |
6
|
81539 |
],
|
81540 |
"bits_prop": [
|
81541 |
+
0.15,
|
81542 |
+
0.85
|
81543 |
],
|
81544 |
"scale_bits": 4
|
81545 |
}
|
81546 |
},
|
81547 |
+
"model.layers.34.self_attn": {
|
81548 |
+
"accuracy": 0.9960626754909754,
|
81549 |
+
"total_bits": 349009920,
|
81550 |
"q_proj": {
|
81551 |
"group_size": {
|
81552 |
+
"6": 32,
|
81553 |
+
"5": 32
|
81554 |
},
|
81555 |
"bits": [
|
81556 |
+
6,
|
81557 |
+
5
|
81558 |
],
|
81559 |
"bits_prop": [
|
81560 |
+
0.1,
|
81561 |
+
0.9
|
81562 |
],
|
81563 |
"scale_bits": 4
|
81564 |
},
|
81565 |
"k_proj": {
|
81566 |
"group_size": {
|
81567 |
+
"6": 32,
|
81568 |
+
"5": 32
|
81569 |
},
|
81570 |
"bits": [
|
81571 |
+
6,
|
81572 |
+
5
|
81573 |
],
|
81574 |
"bits_prop": [
|
81575 |
+
0.1,
|
81576 |
+
0.9
|
81577 |
],
|
81578 |
"scale_bits": 4
|
81579 |
},
|
81580 |
"v_proj": {
|
81581 |
"group_size": {
|
81582 |
+
"6": 32
|
81583 |
},
|
81584 |
"bits": [
|
81585 |
+
6
|
81586 |
],
|
81587 |
"bits_prop": [
|
81588 |
1
|
|
|
81591 |
},
|
81592 |
"o_proj": {
|
81593 |
"group_size": {
|
81594 |
+
"6": 32,
|
81595 |
+
"5": 32
|
81596 |
},
|
81597 |
"bits": [
|
81598 |
+
6,
|
81599 |
+
5
|
81600 |
],
|
81601 |
"bits_prop": [
|
81602 |
+
0.1,
|
81603 |
+
0.9
|
81604 |
],
|
81605 |
"scale_bits": 4
|
81606 |
}
|
81607 |
},
|
81608 |
+
"model.layers.34.mlp": {
|
81609 |
+
"accuracy": 0.9943122416734695,
|
81610 |
+
"total_bits": 1670626608,
|
81611 |
"gate_proj": {
|
81612 |
"group_size": {
|
81613 |
+
"6": 128
|
|
|
81614 |
},
|
81615 |
"bits": [
|
81616 |
+
6
|
|
|
81617 |
],
|
81618 |
"bits_prop": [
|
81619 |
+
1
|
|
|
81620 |
],
|
81621 |
"scale_bits": 4
|
81622 |
},
|
81623 |
"up_proj": {
|
81624 |
"group_size": {
|
81625 |
+
"6": 128
|
|
|
81626 |
},
|
81627 |
"bits": [
|
81628 |
+
6
|
|
|
81629 |
],
|
81630 |
"bits_prop": [
|
81631 |
+
1
|
|
|
81632 |
],
|
81633 |
"scale_bits": 4
|
81634 |
},
|
81635 |
"down_proj": {
|
81636 |
"group_size": {
|
81637 |
"8": 32,
|
81638 |
+
"6": 128
|
|
|
81639 |
},
|
81640 |
"bits": [
|
81641 |
8,
|
81642 |
+
6
|
|
|
81643 |
],
|
81644 |
"bits_prop": [
|
81645 |
0.05,
|
81646 |
+
0.95
|
|
|
81647 |
],
|
81648 |
"scale_bits": 4
|
81649 |
}
|
81650 |
},
|
81651 |
+
"model.layers.35.self_attn": {
|
81652 |
+
"accuracy": 0.9989852329017594,
|
81653 |
+
"total_bits": 526999040,
|
81654 |
"q_proj": {
|
81655 |
"group_size": {
|
81656 |
+
"8": 128
|
|
|
81657 |
},
|
81658 |
"bits": [
|
81659 |
+
8
|
|
|
81660 |
],
|
81661 |
"bits_prop": [
|
81662 |
+
1
|
|
|
81663 |
],
|
81664 |
"scale_bits": 4
|
81665 |
},
|
81666 |
"k_proj": {
|
81667 |
"group_size": {
|
81668 |
+
"8": 128
|
|
|
81669 |
},
|
81670 |
"bits": [
|
81671 |
+
8
|
|
|
81672 |
],
|
81673 |
"bits_prop": [
|
81674 |
+
1
|
|
|
81675 |
],
|
81676 |
"scale_bits": 4
|
81677 |
},
|
81678 |
"v_proj": {
|
81679 |
"group_size": {
|
81680 |
+
"8": 128
|
81681 |
},
|
81682 |
"bits": [
|
81683 |
+
8
|
81684 |
],
|
81685 |
"bits_prop": [
|
81686 |
1
|
|
|
81689 |
},
|
81690 |
"o_proj": {
|
81691 |
"group_size": {
|
81692 |
+
"8": 128
|
|
|
81693 |
},
|
81694 |
"bits": [
|
81695 |
+
8
|
|
|
81696 |
],
|
81697 |
"bits_prop": [
|
81698 |
+
1
|
|
|
81699 |
],
|
81700 |
"scale_bits": 4
|
81701 |
}
|
81702 |
},
|
81703 |
+
"model.layers.35.mlp": {
|
81704 |
+
"accuracy": 0.9912119247019291,
|
81705 |
+
"total_bits": 1466656000,
|
81706 |
"gate_proj": {
|
81707 |
"group_size": {
|
81708 |
+
"6": 32,
|
81709 |
+
"5": 32
|
81710 |
},
|
81711 |
"bits": [
|
81712 |
6,
|
|
|
81720 |
},
|
81721 |
"up_proj": {
|
81722 |
"group_size": {
|
81723 |
+
"6": 32,
|
81724 |
+
"5": 32
|
81725 |
},
|
81726 |
"bits": [
|
81727 |
6,
|
|
|
81736 |
"down_proj": {
|
81737 |
"group_size": {
|
81738 |
"8": 32,
|
81739 |
+
"6": 32,
|
81740 |
+
"5": 32
|
81741 |
},
|
81742 |
"bits": [
|
81743 |
8,
|
|
|
81752 |
"scale_bits": 4
|
81753 |
}
|
81754 |
},
|
81755 |
+
"model.layers.36.self_attn": {
|
81756 |
+
"accuracy": 0.9965386835392565,
|
81757 |
"total_bits": 349009920,
|
81758 |
"q_proj": {
|
81759 |
"group_size": {
|
|
|
81813 |
"scale_bits": 4
|
81814 |
}
|
81815 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81816 |
"model.layers.36.mlp": {
|
81817 |
+
"accuracy": 0.9901178376749158,
|
81818 |
+
"total_bits": 1441250608,
|
81819 |
"gate_proj": {
|
81820 |
"group_size": {
|
81821 |
+
"6": 128,
|
81822 |
+
"5": 128
|
81823 |
},
|
81824 |
"bits": [
|
81825 |
6,
|
|
|
81833 |
},
|
81834 |
"up_proj": {
|
81835 |
"group_size": {
|
81836 |
+
"6": 128,
|
81837 |
+
"5": 128
|
81838 |
},
|
81839 |
"bits": [
|
81840 |
6,
|
|
|
81849 |
"down_proj": {
|
81850 |
"group_size": {
|
81851 |
"8": 32,
|
81852 |
+
"6": 128,
|
81853 |
+
"5": 128
|
81854 |
},
|
81855 |
"bits": [
|
81856 |
8,
|
|
|
81927 |
}
|
81928 |
},
|
81929 |
"model.layers.37.mlp": {
|
81930 |
+
"accuracy": 0.9958977806381881,
|
81931 |
+
"total_bits": 1881221440,
|
81932 |
"gate_proj": {
|
81933 |
"group_size": {
|
81934 |
"8": 128,
|
|
|
81961 |
},
|
81962 |
"down_proj": {
|
81963 |
"group_size": {
|
81964 |
+
"8": 128
|
|
|
81965 |
},
|
81966 |
"bits": [
|
81967 |
+
8
|
|
|
81968 |
],
|
81969 |
"bits_prop": [
|
81970 |
+
1
|
|
|
81971 |
],
|
81972 |
"scale_bits": 4
|
81973 |
}
|
81974 |
},
|
81975 |
"model.layers.38.self_attn": {
|
81976 |
+
"accuracy": 0.9993991161463782,
|
81977 |
+
"total_bits": 526999040,
|
81978 |
"q_proj": {
|
81979 |
"group_size": {
|
81980 |
+
"8": 128
|
81981 |
},
|
81982 |
"bits": [
|
81983 |
+
8
|
81984 |
],
|
81985 |
"bits_prop": [
|
81986 |
1
|
|
|
81989 |
},
|
81990 |
"k_proj": {
|
81991 |
"group_size": {
|
81992 |
+
"8": 128
|
81993 |
},
|
81994 |
"bits": [
|
81995 |
+
8
|
81996 |
],
|
81997 |
"bits_prop": [
|
81998 |
1
|
|
|
82001 |
},
|
82002 |
"v_proj": {
|
82003 |
"group_size": {
|
82004 |
+
"8": 128
|
82005 |
},
|
82006 |
"bits": [
|
82007 |
8
|
|
|
82013 |
},
|
82014 |
"o_proj": {
|
82015 |
"group_size": {
|
82016 |
+
"8": 128
|
82017 |
},
|
82018 |
"bits": [
|
82019 |
+
8
|
82020 |
],
|
82021 |
"bits_prop": [
|
82022 |
1
|
|
|
82025 |
}
|
82026 |
},
|
82027 |
"model.layers.38.mlp": {
|
82028 |
+
"accuracy": 0.9934721081517637,
|
82029 |
+
"total_bits": 1441250608,
|
82030 |
"gate_proj": {
|
82031 |
"group_size": {
|
82032 |
+
"6": 128,
|
82033 |
+
"5": 128
|
82034 |
},
|
82035 |
"bits": [
|
82036 |
6,
|
|
|
82044 |
},
|
82045 |
"up_proj": {
|
82046 |
"group_size": {
|
82047 |
+
"6": 128,
|
82048 |
+
"5": 128
|
82049 |
},
|
82050 |
"bits": [
|
82051 |
6,
|
|
|
82060 |
"down_proj": {
|
82061 |
"group_size": {
|
82062 |
"8": 32,
|
82063 |
+
"6": 128,
|
82064 |
+
"5": 128
|
82065 |
},
|
82066 |
"bits": [
|
82067 |
8,
|
|
|
82077 |
}
|
82078 |
},
|
82079 |
"model.layers.39.self_attn": {
|
82080 |
+
"accuracy": 0.9963345830328763,
|
82081 |
+
"total_bits": 271615360,
|
82082 |
"q_proj": {
|
82083 |
"group_size": {
|
82084 |
+
"5": 128,
|
82085 |
"4": 128
|
82086 |
},
|
82087 |
"bits": [
|
82088 |
+
5,
|
82089 |
4
|
82090 |
],
|
82091 |
"bits_prop": [
|
82092 |
+
0.1,
|
82093 |
+
0.9
|
82094 |
],
|
82095 |
"scale_bits": 4
|
82096 |
},
|
82097 |
"k_proj": {
|
82098 |
"group_size": {
|
82099 |
+
"5": 128,
|
82100 |
"4": 128
|
82101 |
},
|
82102 |
"bits": [
|
82103 |
+
5,
|
82104 |
4
|
82105 |
],
|
82106 |
"bits_prop": [
|
82107 |
+
0.1,
|
82108 |
+
0.9
|
82109 |
],
|
82110 |
"scale_bits": 4
|
82111 |
},
|
82112 |
"v_proj": {
|
82113 |
"group_size": {
|
82114 |
+
"5": 64,
|
82115 |
+
"4": 64
|
82116 |
},
|
82117 |
"bits": [
|
82118 |
+
5,
|
82119 |
4
|
82120 |
],
|
82121 |
"bits_prop": [
|
82122 |
+
0.1,
|
82123 |
+
0.9
|
82124 |
],
|
82125 |
"scale_bits": 4
|
82126 |
},
|
82127 |
"o_proj": {
|
82128 |
"group_size": {
|
82129 |
+
"5": 128,
|
82130 |
"4": 128
|
82131 |
},
|
82132 |
"bits": [
|
82133 |
+
5,
|
82134 |
4
|
82135 |
],
|
82136 |
"bits_prop": [
|
82137 |
+
0.1,
|
82138 |
+
0.9
|
82139 |
],
|
82140 |
"scale_bits": 4
|
82141 |
}
|
measurement.json
CHANGED
@@ -16934,7 +16934,7 @@
|
|
16934 |
}
|
16935 |
},
|
16936 |
{
|
16937 |
-
"accuracy": 0.
|
16938 |
"total_bits": 899923248,
|
16939 |
"gate_proj": {
|
16940 |
"group_size": {
|
@@ -17038,7 +17038,7 @@
|
|
17038 |
}
|
17039 |
},
|
17040 |
{
|
17041 |
-
"accuracy": 0.
|
17042 |
"total_bits": 995125808,
|
17043 |
"gate_proj": {
|
17044 |
"group_size": {
|
@@ -17087,7 +17087,7 @@
|
|
17087 |
}
|
17088 |
},
|
17089 |
{
|
17090 |
-
"accuracy": 0.
|
17091 |
"total_bits": 1137906608,
|
17092 |
"gate_proj": {
|
17093 |
"group_size": {
|
@@ -17881,7 +17881,7 @@
|
|
17881 |
}
|
17882 |
},
|
17883 |
{
|
17884 |
-
"accuracy": 0.
|
17885 |
"total_bits": 206079360,
|
17886 |
"q_proj": {
|
17887 |
"group_size": {
|
@@ -21527,7 +21527,7 @@
|
|
21527 |
}
|
21528 |
},
|
21529 |
{
|
21530 |
-
"accuracy": 0.
|
21531 |
"total_bits": 143375360,
|
21532 |
"q_proj": {
|
21533 |
"group_size": {
|
@@ -24536,7 +24536,7 @@
|
|
24536 |
],
|
24537 |
"model.layers.12.mlp": [
|
24538 |
{
|
24539 |
-
"accuracy": 0.
|
24540 |
"total_bits": 614790432,
|
24541 |
"gate_proj": {
|
24542 |
"group_size": {
|
@@ -24588,7 +24588,7 @@
|
|
24588 |
}
|
24589 |
},
|
24590 |
{
|
24591 |
-
"accuracy": 0.
|
24592 |
"total_bits": 637728032,
|
24593 |
"gate_proj": {
|
24594 |
"group_size": {
|
@@ -46251,7 +46251,7 @@
|
|
46251 |
}
|
46252 |
},
|
46253 |
{
|
46254 |
-
"accuracy": 0.
|
46255 |
"total_bits": 925328640,
|
46256 |
"gate_proj": {
|
46257 |
"group_size": {
|
@@ -51850,7 +51850,7 @@
|
|
51850 |
],
|
51851 |
"model.layers.26.mlp": [
|
51852 |
{
|
51853 |
-
"accuracy": 0.
|
51854 |
"total_bits": 614790432,
|
51855 |
"gate_proj": {
|
51856 |
"group_size": {
|
@@ -55118,7 +55118,7 @@
|
|
55118 |
}
|
55119 |
},
|
55120 |
{
|
55121 |
-
"accuracy": 0.
|
55122 |
"total_bits": 267324160,
|
55123 |
"q_proj": {
|
55124 |
"group_size": {
|
@@ -55170,7 +55170,7 @@
|
|
55170 |
}
|
55171 |
},
|
55172 |
{
|
55173 |
-
"accuracy": 0.
|
55174 |
"total_bits": 271022080,
|
55175 |
"q_proj": {
|
55176 |
"group_size": {
|
@@ -62974,7 +62974,7 @@
|
|
62974 |
}
|
62975 |
},
|
62976 |
{
|
62977 |
-
"accuracy": 0.
|
62978 |
"total_bits": 271022080,
|
62979 |
"q_proj": {
|
62980 |
"group_size": {
|
@@ -64385,7 +64385,7 @@
|
|
64385 |
],
|
64386 |
"model.layers.33.self_attn": [
|
64387 |
{
|
64388 |
-
"accuracy": 0.
|
64389 |
"total_bits": 139115520,
|
64390 |
"q_proj": {
|
64391 |
"group_size": {
|
|
|
16934 |
}
|
16935 |
},
|
16936 |
{
|
16937 |
+
"accuracy": 0.9644066467881203,
|
16938 |
"total_bits": 899923248,
|
16939 |
"gate_proj": {
|
16940 |
"group_size": {
|
|
|
17038 |
}
|
17039 |
},
|
17040 |
{
|
17041 |
+
"accuracy": 0.9716205019503832,
|
17042 |
"total_bits": 995125808,
|
17043 |
"gate_proj": {
|
17044 |
"group_size": {
|
|
|
17087 |
}
|
17088 |
},
|
17089 |
{
|
17090 |
+
"accuracy": 0.9818738773465157,
|
17091 |
"total_bits": 1137906608,
|
17092 |
"gate_proj": {
|
17093 |
"group_size": {
|
|
|
17881 |
}
|
17882 |
},
|
17883 |
{
|
17884 |
+
"accuracy": 0.9735957626253366,
|
17885 |
"total_bits": 206079360,
|
17886 |
"q_proj": {
|
17887 |
"group_size": {
|
|
|
21527 |
}
|
21528 |
},
|
21529 |
{
|
21530 |
+
"accuracy": 0.9386431761085987,
|
21531 |
"total_bits": 143375360,
|
21532 |
"q_proj": {
|
21533 |
"group_size": {
|
|
|
24536 |
],
|
24537 |
"model.layers.12.mlp": [
|
24538 |
{
|
24539 |
+
"accuracy": 0.9032091200351715,
|
24540 |
"total_bits": 614790432,
|
24541 |
"gate_proj": {
|
24542 |
"group_size": {
|
|
|
24588 |
}
|
24589 |
},
|
24590 |
{
|
24591 |
+
"accuracy": 0.9056077301502228,
|
24592 |
"total_bits": 637728032,
|
24593 |
"gate_proj": {
|
24594 |
"group_size": {
|
|
|
46251 |
}
|
46252 |
},
|
46253 |
{
|
46254 |
+
"accuracy": 0.949563205242157,
|
46255 |
"total_bits": 925328640,
|
46256 |
"gate_proj": {
|
46257 |
"group_size": {
|
|
|
51850 |
],
|
51851 |
"model.layers.26.mlp": [
|
51852 |
{
|
51853 |
+
"accuracy": 0.8996819406747818,
|
51854 |
"total_bits": 614790432,
|
51855 |
"gate_proj": {
|
51856 |
"group_size": {
|
|
|
55118 |
}
|
55119 |
},
|
55120 |
{
|
55121 |
+
"accuracy": 0.9861949309706688,
|
55122 |
"total_bits": 267324160,
|
55123 |
"q_proj": {
|
55124 |
"group_size": {
|
|
|
55170 |
}
|
55171 |
},
|
55172 |
{
|
55173 |
+
"accuracy": 0.9868358941748738,
|
55174 |
"total_bits": 271022080,
|
55175 |
"q_proj": {
|
55176 |
"group_size": {
|
|
|
62974 |
}
|
62975 |
},
|
62976 |
{
|
62977 |
+
"accuracy": 0.9891389207914472,
|
62978 |
"total_bits": 271022080,
|
62979 |
"q_proj": {
|
62980 |
"group_size": {
|
|
|
64385 |
],
|
64386 |
"model.layers.33.self_attn": [
|
64387 |
{
|
64388 |
+
"accuracy": 0.9657053500413895,
|
64389 |
"total_bits": 139115520,
|
64390 |
"q_proj": {
|
64391 |
"group_size": {
|
output-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e0c887792f86c4d84ed160ca44b61acc544f14f134bd426199bee481452e6b1
|
3 |
+
size 8576095656
|
output-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5cd11fa6af7d82bb788473a9206c3c9919c231d69a4830d815c60c1b38f84df
|
3 |
+
size 3079355660
|