diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,81949 @@ +{ + "last_module_idx": 86, + "measurement": { + "lm_head.linear": null, + "model.layers.0.mlp": [ + { + "accuracy": 0.9346602590460527, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9365404154125013, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9493538831409655, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.955032866252096, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9654192736274317, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693455445139032, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9748893427221399, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795014105345073, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821855896397641, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821217875731619, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848444728474868, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907539008479369, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923257051329863, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942640128888582, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954127045838457, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969898142704838, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998354353990994, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.0.self_attn": [ + { + "accuracy": 0.9710684669645209, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732140522254141, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757343169889952, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813629216269443, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876160856924558, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877371293933768, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929364605953819, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930758452729175, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936146089120915, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941034975804781, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938258481653113, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994265009305979, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946208133509284, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951110207720807, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970555927016234, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997557459693206, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981545149103591, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998697975062226, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999486179728257, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9342822275663677, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9364919223283467, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9446643465443662, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9473666517358077, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9664574610559564, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696145936062461, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9730790571162575, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813262882985567, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830356318699686, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827759187472495, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850603075403916, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911828652808541, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925201025448347, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946975072747782, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99559554222383, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996395571843574, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985197188430711, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.9071176930477745, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.914151994805587, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.92625335643166, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9453292896873072, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9552830332203915, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9559151812603599, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.974050688116174, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746136586917075, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764351782045866, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777905658671731, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776067169089067, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.978968620300293, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813501819183952, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828077193937803, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898828189623984, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991370185425407, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932334023086649, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953311991534735, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982398879763327, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.8836882490860789, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8875289088801334, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9059687288183915, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9115418007499293, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.941918316640352, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9467621226059764, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9543565323478297, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694892168045044, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722654411667272, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9703863074904994, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9740713160288962, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848234559360304, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870189067564512, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991679637055648, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922292954043338, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939461213193441, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977800967661958, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9263059779217369, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9309069231936806, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9400983672392995, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9592840797022769, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9639544769337303, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.964439963039599, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797157362887734, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801823936010662, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815584609383031, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824342759031999, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819491960500416, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830308634983865, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849607348442078, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860986188838357, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918523964128996, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930453504386701, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947314513357062, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964692623991716, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986339463411193, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.890041288576628, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8936888293216103, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9105060351522345, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9156720638275146, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9450010876906545, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9496108136678997, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9565948530247337, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9710654490872433, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9737603084037179, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718709807646903, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754607646088851, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985605504951979, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987730390147159, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921101127800188, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926304244681409, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942205187521482, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978865065464848, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9263322918038619, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9308844867505526, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9403544789866397, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9590463889272589, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9639821585855985, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9645266062334964, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797430728611193, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802339751469461, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815460035675451, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824114739894867, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819922306035694, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830595756831922, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851039302976508, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861552495705453, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919508672074268, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993071754512034, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947448929673747, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964538169534582, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986369649046346, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.8918708374625758, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.89517511192121, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9122108597504466, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9174456659116244, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9456974707151714, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9502452800148412, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9574622072671589, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715933925227115, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742925449421531, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722854149969, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758415786843551, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858240830270868, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879348536855296, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922802314946526, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927528069207543, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943203094758486, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979443538345789, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9291172404038279, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.933071073732878, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.940508108390005, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9578203056987963, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9650363577039618, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9655136466026306, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794716944820002, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798995554447174, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813989542032543, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825067551512467, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825822993328697, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836115727299138, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850869853245584, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861544700045335, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919355064630508, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930753370648936, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947560907978761, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962939972940245, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986319028233227, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.8814315293964586, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8849213248804996, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9034859004773592, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.909073183411046, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9404569487822683, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9455924222343847, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9533619347371554, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9689362958857888, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718877830003437, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9695984783925509, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735954686215049, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844553878432826, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868131436799702, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915454121012437, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920539675574553, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937360408274751, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977603758636274, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9294261304955733, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.933771340470565, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9412378009996916, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.95713041958056, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9653569271689967, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9657783320075587, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794830225015941, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798735897792014, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812933081074765, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824039763525912, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826268224339736, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837037968008142, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852250798752433, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863390467668834, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920071786955783, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931649110819164, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946799835092143, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962449928647593, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986130050138423, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.8874150702827855, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8909437405435663, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9086750369322927, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9140805507961073, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9434397032386378, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9483468407078793, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9558564487256502, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.970441642560457, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732304905590258, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711033356817145, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749233205067483, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852277009110701, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874866518535113, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919441177656776, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992455962457155, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940594873930279, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978704591722865, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9255051675595736, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9285134014330412, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.936667944255628, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9557082998125177, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9626724374921698, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630817770957947, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793466768766704, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797434320575312, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811635660497766, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98215850717143, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812532710401636, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824080059402868, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841493995566117, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853004060293499, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914199121688542, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926350516708273, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946639086855086, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962192452267596, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986201266905195, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.8872005563033254, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8907047698372289, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9084585089432566, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9138783718410292, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.943139634634319, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9480621532389992, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9556305879040768, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9703003136735213, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731088813982511, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709202396242242, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747886626343978, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851445947822771, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874203197265926, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919184854156092, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924009395273108, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939819648861885, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978700774280649, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.9138055914326718, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9186558597966245, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9283871838920995, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9475897268245095, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9575265269530446, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9580677622242978, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757257276459744, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976254769061741, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777299150040275, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789402218241441, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787302017211914, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800065708787817, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821411073207855, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834397482244592, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903281150679839, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917210105218386, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936866383803519, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956195385832536, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983627834406338, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.877478185452913, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8812567560296309, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9007980635291651, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9069209788974962, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9378971175143593, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9433071613311768, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9516827997408415, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.967318992865713, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705559454466167, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681974022012008, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724362743528265, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837344006488198, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986226172823655, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911043055747685, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916721394187525, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934408688231519, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976050861571965, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9208068282980668, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9240708413876986, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9322386352639449, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9518744914155257, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9602770962213215, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9607125112884923, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778951829985568, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782986029198295, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797300476776926, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810124118077127, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800276646488592, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812748338046827, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830107437936884, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843067809155113, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907671307262621, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921554327011108, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942762043915296, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959355329996661, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985207739825311, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.8744485378265381, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8784112177397075, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.897544685162996, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.90340172617059, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9366201664272108, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9423388619171946, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9503052673841778, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9667364609868903, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699691126221105, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9674731649850544, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.971894599889454, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833247143971292, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985923746698781, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908622793461147, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914541981722179, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931506404751226, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975595184062657, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.8884038172270122, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8947706159792448, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9080569806851839, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9317291598570974, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9439692120803029, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.944728085869237, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.967449781141783, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681149972112555, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9704180297098661, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.972085322204389, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717612203798796, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736895263195038, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765094675515827, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784036090499476, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98714085707539, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891782426520398, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914995918148443, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940445952509579, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978002896042246, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.8828967746935392, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8866006324165746, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9035055386392694, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9084749096318295, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9414117900948775, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9466919208827772, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9536941678900468, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696385640847055, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724219127705223, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9700841715461329, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9741366144857908, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847365696179239, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871018975973129, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916819875177584, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921777962069762, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936012786469961, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977897287983644, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.9084258393237465, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9124037717518053, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9226306425897699, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9422685096138402, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9542849754032335, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.954944045920121, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735538865390577, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742454572727806, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759087013570886, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772494482366663, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9771036725295218, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783839661824075, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804849954027879, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818826396214334, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895264718093371, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909281832607169, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932172494499307, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951164573431015, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982306672946403, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.8843377514889366, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8878840647245708, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9047610696993376, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9098067785564222, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9421150119681108, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9469896178496511, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9541561038870561, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699149759192216, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726296537800839, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9704949667579249, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743461765741047, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849505800949899, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872085930485475, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917936881906108, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923192403818432, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937901108672744, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978330135345459, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9107770292382491, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9155982481805902, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9254188474855924, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9455470160434121, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9560164056326214, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.956543765569988, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742324760085658, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747130588481301, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764793640688846, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777518855898004, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779316547669863, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791740445714248, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812069347030238, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826090884836096, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898915980991564, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912998441018557, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932584503763601, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952333691088777, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982641300088481, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.9045968745884142, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9074665621707314, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9207454794331601, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9249788334495143, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9516958594322205, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9556884859737597, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9613953326877794, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745012898194162, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767372106250963, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752846234723141, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783921194703955, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873218426578924, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891657797913802, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929474584366146, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935303734321344, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947835026602996, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980351097489658, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9123480821910658, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9162866316343609, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9281137491527357, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9483038594848231, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9564930294689379, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9570564345309609, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976576500817349, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770574993208835, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785486789126145, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795701974316647, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782472095991436, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795291204201547, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821275488326424, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834104729326147, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903291652077123, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916866907947942, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939255659517489, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958553812221477, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984231165757305, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.883927721726267, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8874366032449823, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9040897645448384, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9091972928298147, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9412761111008494, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9462708485753912, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9534119869533338, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694332511801469, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721419434798392, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9700496635938946, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739486731980976, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847425018486223, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986997915725959, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916807718967137, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992214665601128, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936658132233118, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997799445924006, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.9261414879246762, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9287611371592471, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9366979849965948, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.951509061612581, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9631240242405942, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9635281343209117, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789037484871713, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793224475885692, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804524889117793, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815157949924469, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813631804365861, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824942883692289, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841040027768988, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852228415639777, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913552505405325, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992654415337663, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944602325558662, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960805463947748, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985663882015567, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.88530895584508, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8891891178331877, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9051733079709505, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9099900910728856, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9424008444735879, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9473711032616465, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9541140010482386, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697986747089186, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724023310761702, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705570246043959, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9744595336286646, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984977839808715, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872530071358931, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916452490969708, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923539796942159, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937382493364183, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978153046808744, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9069895556098536, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9110806678470812, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9225464431863082, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9424227664345189, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9535980726543226, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9543071514681766, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733191446254128, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9740553388470098, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758665906755548, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770550555304477, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768782399202648, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782454324396033, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806783199310303, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820670861946909, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895566945013247, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910348638107902, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931329433855257, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952206019508211, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982132541113778, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.22.mlp": [ + { + "accuracy": 0.8859554340964869, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8900298570331774, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9058535349996466, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.91086408966466, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9424036992223639, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9473960556482014, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9541888111516049, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696206199495416, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722509917459989, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705470486691123, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9744273361406828, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849674764432406, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872355814042845, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916582633005945, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923342427140788, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937351745994467, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977849780728942, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.22.self_attn": [ + { + "accuracy": 0.9180448870909841, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9215149503005178, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.931351153474105, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9496503974262037, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9588359970795481, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9594388949243646, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9761306690542322, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767344907710427, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783888600374523, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795840210036227, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794165680282995, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806065120195088, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826514187612032, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839379285511217, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990632279138816, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919405595252389, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938123677121965, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957471078163699, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984040071109408, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.23.mlp": [ + { + "accuracy": 0.88538616581967, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8896257124449077, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9057448600467882, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9108010154021413, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9423111614428068, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9473961403495387, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9542902112007141, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693328800954317, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9719723431687606, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.970395565032959, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743403337503734, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848544048635584, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871929907485059, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915029194794203, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922653860167453, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936952151750263, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977312886009091, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.23.self_attn": [ + { + "accuracy": 0.9178524017333984, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9211537775240446, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9302882081583926, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9476905716092963, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9587831842271906, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593203601084257, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762575469518963, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768429759301638, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783897635183836, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.97942306493458, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792919378531607, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805250810949426, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823770711296483, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983679125183507, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990443763764281, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991789217057981, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938293652314889, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955993184917852, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983721604864848, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.24.mlp": [ + { + "accuracy": 0.8870599269866943, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8916945457458496, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9084899550990054, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9139168136998227, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9432696668725264, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9480592137888858, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9553383526049162, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9695946103648135, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722640922195033, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709495243273283, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747076881559271, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985167250821465, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873514057774293, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916048410691713, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924715255436144, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940450046407548, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977433924612246, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.24.self_attn": [ + { + "accuracy": 0.9246881384598582, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9287621974945068, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9374134791524786, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9538935391526473, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.962536981231288, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630432724952698, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779086803135119, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783807428259599, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797923878619546, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807372689247131, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813162951092971, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823374795286279, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840551050085771, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853140209850512, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913850607056367, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926514656920182, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942969970012966, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960499198028916, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985366548950735, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.25.mlp": [ + { + "accuracy": 0.8856831098857679, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8899902293556615, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9072511509845131, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9127009165914435, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9424153691843936, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9473517192037482, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9547803872510007, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694524062307257, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721151088413439, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705099526204561, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743231221249229, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849243728738082, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871587674868735, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915680140256882, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922996673144793, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938707222279749, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977291336185053, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.25.self_attn": [ + { + "accuracy": 0.9160962481247752, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9211133530265406, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9294311059148688, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9469149049959684, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9580121855986745, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9586484432220459, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753491580486298, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760584893979525, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777768348392687, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787655300215671, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789688116625735, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802263065388328, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981923122155039, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833274436624426, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901032110578135, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916176121485861, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934654435829112, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955588477222543, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983065630260267, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.26.mlp": [ + { + "accuracy": 0.8866521433780068, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8910927145104659, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9076863339072779, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9130546607469258, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9429527521133423, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9478055270094621, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9549664196215177, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694053781659979, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9720532674538461, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708038850834495, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745876506755227, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850984482388747, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873127913788745, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915475664954436, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992448973812555, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939429705080233, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997709263704325, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.26.self_attn": [ + { + "accuracy": 0.9269709273388511, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.930440952903346, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9385320512871993, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9538965727153578, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9631926668317694, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.963949702287975, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.978104840768011, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790072284246746, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980301569951208, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813458746985385, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818006832348672, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982958414052662, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845771679752752, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985762241639589, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916147040693384, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928665317987141, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944289012959129, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962162571518045, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985520028950352, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.27.mlp": [ + { + "accuracy": 0.8917075207358912, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8961978711579975, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9127908756858424, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9184149503707886, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9456631321656077, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9503073064904464, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9573905624841389, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9704082388626901, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731447790798388, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722489683251632, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758260030495493, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858315336076837, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879178796943865, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918676003029472, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928471940128427, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943832315896687, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997793107832733, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.27.self_attn": [ + { + "accuracy": 0.91286469760694, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9180008298472354, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9289040502748991, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9512524730280826, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9561269000956887, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9570591637962743, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731471318947641, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742240843019987, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765181776724363, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777374424432453, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781703588209654, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795968924698076, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814510565055045, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830430777449357, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897958071608293, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915148404083753, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929275128402208, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956841402147946, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981592607341314, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.28.mlp": [ + { + "accuracy": 0.895037801642167, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.899045887746309, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.915137529373169, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9204442501068115, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9472402083246332, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9517654745202315, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9586802250460574, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715813837553325, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9741966222461901, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9730088773526644, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765094016727648, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862080984993985, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882605107207048, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922122367118534, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929908992428529, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944547363802007, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978765708051229, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.28.self_attn": [ + { + "accuracy": 0.930534375341315, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9345188391836066, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9413077643043116, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9535483937514455, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9650114962929174, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9663626520257247, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976794150314833, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786935605500874, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811569624825528, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821593933983853, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829521634076771, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841372119752985, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848556942061374, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862220616717088, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915570381440615, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930555608711744, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994072796090653, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961391777584427, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984459429979324, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.29.mlp": [ + { + "accuracy": 0.900224158638402, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9040186844374004, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9192641848012021, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9241349006953993, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9498954227096156, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9541931246456347, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606327759592157, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731620895235162, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755343010551051, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743133893138484, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776627013557836, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868509494944623, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888279218422739, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925590228093298, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933126717805862, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946630483395175, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979770244344285, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.29.self_attn": [ + { + "accuracy": 0.930271631792972, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9347113182670191, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9435421102925351, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9551752146921659, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649795670258372, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.966516237509878, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9763915193708319, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782072556646246, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802924880855962, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816377319787678, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832670829798046, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845319923601652, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858600528616654, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869602973523893, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916656895687705, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934891556438646, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935463874747879, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963954106757515, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982829003741867, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.9024018927624351, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9055826977679604, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.919161922053287, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9238067865371704, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9505989520173324, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9545966010344655, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606011196186668, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735862142161319, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758990752069574, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747165190546137, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777593691098062, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870083504601529, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888114395894503, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927070752570504, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933610346756483, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947268829533928, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979420461152729, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9158677176425332, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9212387486508018, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9336258800406205, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9536180872666209, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9586109330779627, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9592750041108382, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768670044447246, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.977465116663983, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790542847231815, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800388467939276, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979376683109685, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806353340023443, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833314544276187, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845860569100631, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909456458530927, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922767312903154, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939831032564765, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961418234988263, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984338414904318, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.30.mlp": [ + { + "accuracy": 0.9042556725050274, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9077072331779882, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.921883037215785, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.926497315105639, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9517323625715155, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9557517641469052, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.961874312476108, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742046139742199, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764852680658039, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752902074864036, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784340482009085, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873820535446468, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892232559229198, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929304397419879, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993582555337956, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948678412719777, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980807441629862, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.30.self_attn": [ + { + "accuracy": 0.9428510665893555, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.946467073340165, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9517648659254375, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9636179553835016, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708133120285837, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724559031034771, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805720056358137, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825399690552762, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841706941002294, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850701454438662, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857744731401142, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868495307470623, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874915527670007, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887069385302695, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928492582158038, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943177656907785, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947274033176271, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968884316714186, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986295688309168, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.31.mlp": [ + { + "accuracy": 0.9087948924616763, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9119861627879896, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9251496478130943, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9293349793082789, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.954120723824752, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9579206673722518, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9635603898449948, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.975611084385922, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777848924461164, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765865536112535, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795536273404172, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988070052705313, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989798367023468, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993367778627496, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939355058105368, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950803761419497, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981997958139369, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.31.self_attn": [ + { + "accuracy": 0.9509983345081932, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9538805798480385, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593412499678762, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697634201300772, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750145736493563, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758063899843317, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983853911098681, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848604406181135, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986473118004046, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987108261961686, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876350791830766, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885242048062777, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894347669262635, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903045304511723, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940445148631146, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950999539149435, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995781660079956, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973115585744381, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998877693653891, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.32.mlp": [ + { + "accuracy": 0.9130361456620066, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9159503924219232, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9278517083117837, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9316335916519165, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9561146466355575, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.959665486687108, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.964857910808764, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768086499289462, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788576458629809, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775907616866263, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804181456565857, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885891354397723, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902305665769076, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936984334337083, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941922575235367, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952343435663926, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983038835619625, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.32.self_attn": [ + { + "accuracy": 0.945003446779753, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9481485674255773, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9532996384721053, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9648196383526451, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717028737068176, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9729851264702646, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811561876221707, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982742820915423, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845284129443922, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856914297530526, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862948216890034, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872789241765675, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879361908686789, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989105467733584, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929821640253067, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945366284564922, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994769529292458, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970194115058372, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998616767066874, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.33.mlp": [ + { + "accuracy": 0.9164149823941683, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9191543491263139, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9300094591943842, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9334600787413747, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9578924712381864, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9612106367161399, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9659476531179327, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778211493241159, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797457597757641, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785679751320889, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811797047916212, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891011346327631, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906232294283415, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940089559868762, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944604658766797, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954060492546934, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983911099598596, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.33.self_attn": [ + { + "accuracy": 0.9473568326548526, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9498882764264157, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.956181996747067, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9662274561430279, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.973601830633063, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745375623828486, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827987441891118, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840707527963739, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859150240295812, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865271182436692, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871097912913874, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880025716204393, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888220656859247, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899101853370667, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935008040384242, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99489258935577, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951933496876767, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971964180861649, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987586317093748, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.34.mlp": [ + { + "accuracy": 0.9199637425573248, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9224993931619745, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9324342075147127, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9355867912894801, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9595104581431338, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9627045612586171, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670942425727844, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786953047702187, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805383682250977, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793563720427061, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818968882686213, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895031514920687, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909773116049013, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942376911640167, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994661492736716, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995528019964695, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984485336432332, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.34.self_attn": [ + { + "accuracy": 0.947713055108723, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9505773688617506, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9585475858889128, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9680181020184567, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736087573202032, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747442242346311, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822374535234351, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983612763254266, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985236141242479, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986031849133341, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872145401804071, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881564033658881, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891908717782874, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901496697413293, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937170728256828, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950356808932204, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951564171596577, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972085607679266, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987203798403865, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.35.mlp": [ + { + "accuracy": 0.9195801145152042, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9221632857071727, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9319511338284141, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9350945008428473, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9591880158374184, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624335703096891, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9667874951111644, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784966142554032, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803647712657326, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791693875664159, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817474547185396, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894041944491235, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990900299266765, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941822729612652, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994609035551548, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995469736817636, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984255867373002, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.35.self_attn": [ + { + "accuracy": 0.9471979266718814, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9502266865027578, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9561342283299095, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9660673110108626, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726612128709492, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9744738779569927, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818736236346396, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839371850616053, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856157428339908, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864593587423626, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870603178676806, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988065849009313, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888184149014322, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899031464990816, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934841480694319, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949242958897039, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950945906733212, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972074459257879, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986884505733064, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.36.mlp": [ + { + "accuracy": 0.9201044157931679, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9227177406612196, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.932390432608755, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.935537388450221, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9595663861224526, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.962770330278497, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670429449332387, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785852494992708, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804564008587285, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793522797132793, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819067669542212, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894857736010301, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909706821567134, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941990069652858, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946551322937012, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955039514522803, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984319061040878, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.36.self_attn": [ + { + "accuracy": 0.9494279058356034, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9515824631640786, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9575687082190263, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9674271470622012, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.973702283282029, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745934887936241, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835255491106134, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846188912266179, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857694428218039, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866255161009336, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987001044185538, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878534251137784, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888901428172463, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899027080912339, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936369618302897, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99495971516559, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952785357048637, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972448196066054, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987664680535856, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.37.mlp": [ + { + "accuracy": 0.9223337173461914, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9248226943768953, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9343676818044562, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9374674495897795, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9605877964120162, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9637158513069153, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679550842234963, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791202388311687, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809410399512241, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798654035518044, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823411263917622, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989741883779827, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911860698147824, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943365238999066, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947786578222325, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956281392982131, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984743850991914, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.37.self_attn": [ + { + "accuracy": 0.9387267639762477, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9431701961316561, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9506164099040785, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9608818449472126, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682322050395765, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696734798581976, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783013682616385, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799962373156297, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822473871080499, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832992726250699, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848844534472415, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860847262959731, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873152437962984, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884787625388095, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924622883922175, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941698928412638, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939637740975932, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966429342564783, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984200164479645, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.38.mlp": [ + { + "accuracy": 0.920904793237385, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9234808369686729, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9331640569787276, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9364298393851832, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.95981554608596, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630585093247264, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9674015327503807, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.978610844988572, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805176258087158, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794434026667946, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820303807133123, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895596614009455, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910200763689844, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942003147382485, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946947380116111, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955708211974094, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984124779309097, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.38.self_attn": [ + { + "accuracy": 0.9590149082635578, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606210934488397, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9673869766687092, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753456649027372, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787578974899492, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980512880965283, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864131933764407, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883504039362857, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895005830024418, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899192562228755, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900426613657098, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907360390612954, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916701442316959, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924012775483885, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952393883937284, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960488343709394, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964819609334594, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980363177233621, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990917206986955, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.39.mlp": [ + { + "accuracy": 0.9243681117107994, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.926798795398913, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9365287078054327, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9397870364942049, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9615223407745361, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9646308171121698, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9689964840286657, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794278803624605, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812573618010471, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980322633918963, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827801663624612, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899722196553883, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913958946340963, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943863745582732, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949104809447339, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957995391205737, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984521217840282, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.39.self_attn": [ + { + "accuracy": 0.9395060978437725, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9418707835046869, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9501557318787826, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630306363105774, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9686340219096133, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9695581636930767, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811664992257169, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821124908171202, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983684208832289, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843733153845134, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845250233223564, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855945063264746, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871241591478649, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883171406231428, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928836548014691, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940999048320871, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949717651072302, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968919224644962, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987151621791878, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.8985492116526553, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.901805319284138, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9150803026400114, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.919344425201416, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9490134559179607, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9532915855708876, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9588577182669389, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731355842791105, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754819917051416, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739737181287063, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.977186066539664, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866826628383837, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885710713110472, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926522703547227, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931838041857669, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994412137097434, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980290886995039, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9142496146653828, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9198124095013267, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9327549369711625, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9523416443874961, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9575716696287456, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9583029841121874, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758769760006353, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764843739961323, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781012456668051, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791382284540879, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787211135814065, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799930578783939, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828271191371115, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840973568590063, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906336147534219, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992002618940253, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936561670742536, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958682930783221, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983478706133994, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.40.mlp": [ + { + "accuracy": 0.9280669940145392, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9304077499791196, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9394871184700414, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9426282707013582, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9633799446256537, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9663516753598264, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9703927855742606, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803044309741572, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982066744252255, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812757137574648, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835974229009528, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904403208117736, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918089744291807, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946218395703718, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951422449789549, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959748010886343, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985063955383865, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.40.self_attn": [ + { + "accuracy": 0.9346072297347219, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9383607224414223, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9502093352769551, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.96327532278864, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9665598555615074, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676608662856252, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783052770715011, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795472559175993, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810372355737185, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819600268414146, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837902752976668, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848497792294151, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868959510012677, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878980885995062, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926277375534961, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939494344748949, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942636383991492, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967636181727836, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985059582089123, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.41.mlp": [ + { + "accuracy": 0.9481586876668429, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9497779795998021, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.956284485365215, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9584317301449022, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735647565440128, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757176684705835, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786364655745657, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858373842741314, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871232776265395, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864894700677771, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881650436865655, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931160431159171, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940901630018887, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961605166134081, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965148583838814, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971149893183457, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998972020259029, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.41.self_attn": [ + { + "accuracy": 0.9584259328089262, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9614708392243636, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.969149583264401, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976904593015972, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786684356237713, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791906617189708, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873821900079125, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878443369739934, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886508160515836, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890493107469458, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895177358075192, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901464330522638, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917999945188823, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924777667773398, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954838195913717, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962066470792419, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996531991190032, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982457204084647, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999114755835188, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.8961093112042076, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8995043854964406, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9174685478210449, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9235247561806127, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9480728507041931, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.952238961269981, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600803161922254, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.972139414988066, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747872823163083, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733782316509046, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765762435762506, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862785480524364, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988214879443771, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923801524074454, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929790496826172, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948684620229822, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997935845467605, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.9197947477039538, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9251509465669331, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9356879435087505, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9550359719677975, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606386109402305, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9612202048301697, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9771611816004703, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776718992935983, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979504877015164, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805318226939753, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803342991753629, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815588922877061, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837921591181504, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849617920423809, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912156737164447, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924768468267039, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941324677906538, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961310832908279, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984739926692686, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.8929096272117213, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8961125486775449, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9133353735271253, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9188690687480726, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9459498618778429, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9504654376130355, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9577331919419139, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.971407015072672, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.974143131783134, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722574823781064, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757717941936693, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857272897896013, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878439511123457, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921659293927645, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926692176806299, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943906625634745, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997864868687956, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.9221241662376806, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9271972367638036, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9369838112278989, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.957046201354579, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9621911143001757, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.962721730533399, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784806041341079, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790133159411581, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803351665797987, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813495466583654, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981099867507031, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822150374713697, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842419859610105, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853844877920652, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914709102166327, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926828765555432, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944141852228265, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962576886540965, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985493733302543, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.883547958574797, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8869107773429469, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9059860455362421, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9118572599009463, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9415065614800704, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9465115760502062, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9543330041985763, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9691803455352783, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721149739466215, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699868089274356, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738362023704931, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845667967670843, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868806545671663, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915455783668318, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920725171503267, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939151535692968, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977291910664031, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.9220422694557592, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9275187630402415, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.936932551233392, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.954965657309482, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9618771358540184, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624796258775812, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777556218599018, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783260822296143, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798300956424913, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980840370843285, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809433174760718, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821023752814845, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841111239634062, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852699825638219, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913591177839982, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925977046552458, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941853841668681, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960515953992543, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984803964432917, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.8842033963454397, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8877226177014803, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9065986683494166, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9124087597194471, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9422692625146163, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9470931166096738, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9548533778441579, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696872171602751, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725308951578642, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9704963408018413, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742089682503751, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848638867077074, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870833503572565, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917323534425936, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922276217686502, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939880378936466, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977556018060759, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.9161534874062789, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9204538433175338, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.931653198442961, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9537342912272403, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9583287490041632, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9589311637376484, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772662755690122, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778448186422649, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793039905397516, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802920457563902, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792278785454599, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980466536785427, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830341950843209, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843043167340128, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908395456640344, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992160647323257, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941237788451346, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960825509930912, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984877295792103, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.8909975729490581, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 345170240, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8943474167271664, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 358015296, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9109228974894473, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 399599872, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9161535250513178, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 448342272, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9450994416287071, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 505238240, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9498880097740575, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 519110144, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9567827958809703, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 558117472, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.971051507874539, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 637706912, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9737889123590369, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 647101952, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718540216747084, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 656741088, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755156275473142, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 670612992, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855307638645172, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 808243936, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877233364080128, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 822115840, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920510649681091, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 935891680, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925882102627503, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 967974784, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941979466300261, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1055137664, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978647296758074, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1238638464, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.9293622907839323, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 93809536, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9334898371445506, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 97119104, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9426421617206774, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 103179840, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9576557498229177, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117628992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649758119332164, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 138958272, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.965507055583753, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 139188992, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804034327205858, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178017728, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808614504964728, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 178248448, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821841889306119, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 179860992, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830374435374611, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182163200, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825682577333952, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 183229184, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836378254388508, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 184612352, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856920791299719, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 190704512, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867359368424666, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 193353472, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922285738744234, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 233592256, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993356770590732, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 237393664, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994939502916838, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 266098112, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965374030564961, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 284923648, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986898877510899, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 354178496, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.norm.norm": null + } +} \ No newline at end of file