diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,31223 @@ +{ + "last_module_idx": 34, + "measurement": { + "lm_head.linear": null, + "model.layers.0.mlp": [ + { + "accuracy": 0.9463801321230436, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9480113512591312, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9563822307084736, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593624943181088, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751329171030145, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776772433205655, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815935272919504, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871811270713806, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885265191918925, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877794392799076, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893180602475217, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937298133185035, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945968883602243, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964714077742476, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967200454128416, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975143327916923, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987442609118787, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.0.self_attn": [ + { + "accuracy": 0.9047197040758634, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9259027305402254, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9363632202148438, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9528487167860332, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9584701312215704, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9599877815497548, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745401928299352, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754576824213329, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781504524381537, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791662379315025, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980703984436236, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813871446408724, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839421510696411, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854497266443152, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911978934940538, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928100273797387, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936721493539057, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966174866023817, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982388392090797, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9865066111087799, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844785458163211, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872358692319769, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885954958827872, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993292468943094, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946399754599521, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996082505505336, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963871315121651, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971440885412065, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962872937321663, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965098519858561, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983312675827428, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985444844749413, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989669655302638, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989427632015002, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991988504893685, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993557764315292, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.9611551730256331, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9639073673047518, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9651901376874823, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795747916949423, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815582774187389, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816228951278486, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896486087849266, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896860012882635, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990466044921624, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912410103961041, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906435671605562, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912577271461487, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914819135477668, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921024637786966, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952611131103415, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960255516987098, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972374572565681, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978178057231402, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991666369728351, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.9890530203518114, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892657022727164, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906717264338544, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910578484597959, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944616352256975, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994902815865843, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954982976380148, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971370716628275, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973946252935811, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971534799980489, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975044995938477, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985246409318949, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987265733315757, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991782051266024, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992070347560864, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993220122629091, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996956828246383, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9830673823231145, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836182955064272, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879190913940731, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904288265265917, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918054954001778, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920769710289804, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958284148260167, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960657954216003, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996395059322056, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965301552101186, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961719752142304, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964521715515539, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973696301642218, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975571985307493, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985502734780312, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987607937502233, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989237845140068, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994014044243255, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999642600133819, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.9889882120646929, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892291128635406, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990500829721752, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908790266827533, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99441457147661, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948649727984479, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954272089035887, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99709448747729, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973673183274897, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971322897625597, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974843245979986, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985071008926943, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987171574642784, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999169109193118, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992012493312359, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993061413008132, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996925961520327, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9857747680262515, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861991562341389, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890790095454768, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917377366831428, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934980034043914, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935771466085785, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996627362150895, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967776576155111, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969593306121073, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971002524620608, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968900315855679, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970161640330365, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976326648734117, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977859852737502, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987101260768739, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988884333717195, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991094084750665, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994293450819034, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996801441241252, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.9876395385516318, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878772599132437, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892491776692239, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896669795638636, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937428337963004, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942279614900288, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948479133216959, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967467467251577, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997046811800254, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967858907031385, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971826560795307, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983359438024069, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998565597165572, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990755885251259, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991167492576336, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992300810194329, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996654498881024, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9861513407606828, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867421906245383, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892480334168986, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918656553092756, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933008951576132, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934441972719995, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967391498778996, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968425435455222, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970520987714592, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971255230668344, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969356412950315, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971404638337461, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976574680522868, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978002680367545, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987198246740981, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989074538216779, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991193566667406, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994298540251819, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999686071159024, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.985698496040545, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860292986819619, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877572702734094, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883003744639849, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928323940226906, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934205522662715, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942022722802664, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962426824005026, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966021948739102, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962952603635035, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967633593631419, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980873292998264, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983455454440493, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989322446482746, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989719617327577, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991133727721477, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996164931033394, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.984989301154488, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854121208190918, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880441317432805, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909858727141431, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992621206923535, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928155008115267, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964216441700333, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965287592065962, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996801841415857, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969649167829439, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967114944991312, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969998441244426, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974894972616121, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976700883554784, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985949339247063, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988216173491979, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990669696738845, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993879116679493, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999667373927016, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.9831860410539728, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835648599423861, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856788500359184, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863363473038924, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916918348324927, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922872530786615, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932111344839397, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99566894730455, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960370538266081, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957360617424312, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962440635028639, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977958884678388, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998103205310671, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987690091917389, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988279739688886, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989974545805078, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995624522531503, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9873934474430586, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880638608807012, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900555108722887, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929217523650119, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939721046309722, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941741632003533, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967928644465772, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996936715158977, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971886349743918, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973042436728352, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971347239456678, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973364303770819, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976772795382299, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997884885849137, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987164897550094, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989159733645225, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991265203019506, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994288033952838, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996811402716527, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.9731346306047941, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735715985298157, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979283299885298, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812150315234536, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865743910011492, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987661683245709, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901565705475054, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928667749229231, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936601947012701, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930848369472906, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993959062977841, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964194631105975, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969274303630778, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980059691557759, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981034545129851, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986177356423516, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993022513624868, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.9844099989062861, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846681701509576, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878117175478685, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915275581573185, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929190027086359, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931305325344989, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961705235274214, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962343534356669, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965843821042463, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968232200726083, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965496674964303, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969165599659869, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972061726607775, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974227473139763, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983966900525909, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986616726964712, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989414423900215, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992651286485948, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996025460085979, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.9918053479571092, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921058581063622, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933787043157377, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993770107626915, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960289107341516, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996350919729785, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968917507090067, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997925220743606, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981185958573693, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979728138760516, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998219801995315, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998931908783944, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999080074362849, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999385247840301, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993896160372778, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994920138759833, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997457153359918, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9881857241454878, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884721668143022, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900692889564916, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934994468563482, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943238590892992, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943760284467748, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970322210145625, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971315135297022, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973514697661525, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975059193215872, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997243655747489, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974358720999015, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977291881253845, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979081047992957, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987494040672716, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998924482417734, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992070893119824, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994214848664246, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997015731213125, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.9919965573047337, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922738381122288, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937740999617075, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942934681710444, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960275013980112, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963867719236174, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970810225135401, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978777542710304, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981117772036477, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979468397796154, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982060380280018, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989065343612119, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990552206964869, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993510130596789, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999382808853529, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999513588502611, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997160007049771, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9866033714068564, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870172895883259, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890515012176413, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926948359138087, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934077553058925, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934605371795202, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965505407829034, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966406881024963, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969196821513929, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997067348149262, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967934595126855, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970216515817141, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974449876891939, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976418420280281, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985969780307067, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988023285803042, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990887951694036, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993771277368069, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996840753347466, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.9933463115441171, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934975697021735, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945173412561417, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994814516290238, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966447427868843, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969284983449861, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973658356619509, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982434707252603, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984147000665727, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982598918048959, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984886118848073, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990805635522855, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992068813329464, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994620830800972, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994833342927066, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995659504968085, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997701005225903, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9884011016080254, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885907079044142, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903149706752676, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935770611229696, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941203056981689, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942229162705573, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969146290892049, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970106621714014, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972384721040726, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973915825156789, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971288132824396, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997347591346816, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976854275323843, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978662842982694, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987167629756426, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989050596364235, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991842737715495, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994237136683966, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997048363040545, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.9915347593395334, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917689216764349, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992873982379311, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993186363264134, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957568410195803, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961063034440342, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965641184857017, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977996529717195, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979943340938342, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978206755691453, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980892491968054, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988579881426535, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999017197736784, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993521838792061, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993766641832496, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994665265181347, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997453505026275, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.987247317245132, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987579439815722, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898829609155655, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930984636670664, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936989788946352, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937560938690838, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967279696935102, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968369360032835, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970906716036169, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972196625251519, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969233478370466, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971611488022303, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976028098087562, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977969721351799, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986811323385489, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988710567551223, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991225805133581, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994134463271812, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996846344714102, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9911795854568481, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914371763405047, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925044084850111, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928060958259984, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955970139095658, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959403982287959, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996389060428268, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977067983464191, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979121298774293, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977346815560993, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980098116946848, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988134775898958, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989789483186445, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993320564788423, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993562712579181, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994428074477535, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997439745715574, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.988287154781191, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885790159827784, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908215466298556, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937442980314556, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940572153580817, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941818541602084, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996895942640932, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970104490455828, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971983501393544, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973073576233888, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971184783468121, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972814745024631, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977938054423583, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979829907809433, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987848420676432, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989633221963519, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999164496303389, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999466055219895, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997027262947277, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.9909282279642004, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911570392156902, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922867895741212, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99261274933815, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954373683584364, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958007327820125, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962808968205201, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976236263388082, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978377697499174, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976431680353064, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979350386481536, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987722392144956, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989425085092846, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993074516226587, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993351074915967, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99942810024674, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997363485591976, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.9866042294000325, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872272477338189, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902456559632954, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928519937552904, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933726709139975, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993569099589398, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965874007657954, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967115105766999, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969297349452972, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970433470842085, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996780096504249, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970294366541662, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997711168504075, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978880764622438, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987420810288504, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989250263101176, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991106217432963, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994611534240999, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996896656965347, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.9903661506740671, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906011953165657, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918992848772752, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922605243168379, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951185521326567, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955461366396201, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960828705837852, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974885877025755, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977090231289989, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974858464771196, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978076899914365, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986810893996766, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988731522309152, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999260405666734, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992860439968737, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993898852385188, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997193614315045, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.987259893040908, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874859609101948, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901909020386244, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928204534869445, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936556341616731, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993838625911035, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967614545633918, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996863644962248, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971331316781672, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972502536287433, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969553451396918, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997194979339838, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977171491635474, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979279656943522, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987537540299328, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989364245220235, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991417555628639, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994587846296398, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996983472404903, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9900100513508445, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 113182032, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902033256857019, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 117376336, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914813363238385, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 131087360, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918456540295952, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 147012608, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994955557349481, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165434336, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953753587446714, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 169691136, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959150525300127, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 182412768, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973801345025238, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 208362720, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976217035008105, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 211437568, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973932151731691, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 214914016, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977235241155875, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 219170816, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998644431954936, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 264393696, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988353375933672, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 268650496, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992381191665405, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 305746912, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992672635340377, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 317592064, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993735669965023, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 345903616, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997127137735093, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 404623872, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.9848764036831102, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 22550528, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852782738836188, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 23336960, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885798849557575, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 24220928, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915013627002114, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 28283392, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927309508386412, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33360896, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928996759025674, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 33394432, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962547499882547, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42535936, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964036486650768, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 42569472, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966387744796904, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 42933760, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996782305601396, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 43528192, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964977722418936, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 43880192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996744444887889, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 44228096, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997415350063851, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 45094912, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997589651887354, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 45608960, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985546334401557, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 55249920, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987812990224675, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 56094720, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990017208221712, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 63507456, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993906083859896, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 66596864, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996615474749553, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 84478976, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.norm.norm": null + } +} \ No newline at end of file