NicoNico's picture
Add files using upload-large-folder tool
8da533b verified
{
"measurement": {
"model.layers.0": {
"accuracy": 0.9522858953569084,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.1": {
"accuracy": 0.9113649652572349,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.2": {
"accuracy": 0.5754014626145363,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.3": {
"accuracy": 0.8346537635661662,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.4": {
"accuracy": 0.9821812690061051,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.5": {
"accuracy": 0.9811006280651782,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.6": {
"accuracy": 0.9804062172188424,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.7": {
"accuracy": 0.9799818968458567,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.8": {
"accuracy": 0.9785304359684233,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.9": {
"accuracy": 0.9782906010514125,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.10": {
"accuracy": 0.9774000000325032,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.11": {
"accuracy": 0.9770228461129591,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.12": {
"accuracy": 0.9764093302073888,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.13": {
"accuracy": 0.9736090413061902,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.14": {
"accuracy": 0.9728761716396548,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.15": {
"accuracy": 0.9717501879786141,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.16": {
"accuracy": 0.9719149245065637,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.17": {
"accuracy": 0.971333933644928,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.18": {
"accuracy": 0.9697856259881519,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.19": {
"accuracy": 0.9718488412327133,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.20": {
"accuracy": 0.9690313608734868,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.21": {
"accuracy": 0.9688104883534834,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.22": {
"accuracy": 0.9654940785258077,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.23": {
"accuracy": 0.9653943074517883,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.24": {
"accuracy": 0.9633830261882395,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.25": {
"accuracy": 0.9626653084997088,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.26": {
"accuracy": 0.9641250073327683,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.27": {
"accuracy": 0.9639517902396619,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.28": {
"accuracy": 0.965472717594821,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.29": {
"accuracy": 0.9657354074879549,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.30": {
"accuracy": 0.968929739901796,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.31": {
"accuracy": 0.9708427549339831,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.32": {
"accuracy": 0.9665285683004186,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.33": {
"accuracy": 0.966284410096705,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.34": {
"accuracy": 0.9638014331576414,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.35": {
"accuracy": 0.9620210637804121,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.36": {
"accuracy": 0.9612511987797916,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.37": {
"accuracy": 0.9527971904026344,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.38": {
"accuracy": 0.9518032884225249,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.39": {
"accuracy": 0.947145561571233,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.40": {
"accuracy": 0.9391240942059085,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.41": {
"accuracy": 0.9377131020883098,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.42": {
"accuracy": 0.9319567781640217,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.43": {
"accuracy": 0.9371241622138768,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.44": {
"accuracy": 0.9392979013500735,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.45": {
"accuracy": 0.9402275587199256,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.46": {
"accuracy": 0.9368646148359403,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
},
"model.layers.47": {
"accuracy": 0.9256984132807702,
"total_bits": 2541146112.0,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_gate": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
},
"moe_expert_down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4,
"scale_groups:": 32
}
}
}
}