Qwen3-8B-q4f16_1-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
d5bedef verified
{
"metadata": {
"ParamSize": 437,
"ParamBytes": 4607731712.0,
"BitsPerParam": 4.500432754306446
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 311164928,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
151936,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 311164928,
"byteOffset": 0
}
],
"md5sum": "c3bc1aea1f53d9ea9db272d0abcfae1d"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 38895616,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
151936,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 38895616,
"byteOffset": 0
}
],
"md5sum": "9ee15a143596b7345876bc2bee09b72a"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 311164928,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 311164928,
"byteOffset": 0
}
],
"md5sum": "076a502a77cdadc1eae16f8e112205c8"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 38895616,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 38895616,
"byteOffset": 0
}
],
"md5sum": "86d5131785223e941ba42c882bbab1cb"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5838bf7ab6e9ed6a16584e03fdb8721e"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 28319744,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 8192
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 25174016
}
],
"md5sum": "2b5e96567c124e7323eeef0a21b4f607"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a922a5c1a293dc1ae02fbc38fc2a5869"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0ce47f5d210356e7205f7739422dc988"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.0.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.0.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "d42f4826f97c27eec29700b74deed4c5"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9e294f36c236d829a57ee7bac425a7a4"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "27d2f7f9b1d1f7e441ec9fce0684fbef"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.1.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.1.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "ec95497f44ffbc2788701d0a73c472cc"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cc479c23a899028f5fe693c490621653"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "87d6b589a23953a8024bc6fde342b1e2"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.2.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.2.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "8c452d723ec41a79d04840a43dbbfd98"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "15d1761f9925826c3b70d895d8bf0a33"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4164db0371753ac5d2029b411816d2fa"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.3.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.3.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "3934a7fcc6772ab3a6e1d52ed94cda24"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a38adb796726b2d46286724300a644cb"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "1852ca0200f90894f1d0d1f99745bee7"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.4.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.4.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "0fc4c10051a354dd118ce1740777c018"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c8f890459799b2c1e9c7cd68560887bf"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4cc53980179096243df5249ebeb4b4f7"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.5.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.5.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "80f6a936aa2b2ba1f53ae09da44f72ec"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 29893120,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.6.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.6.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
}
],
"md5sum": "cf7fa638fe7c0dfe3020a4644c4c8715"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9b259066875da3269cece212a5889c67"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f195961e13522c7f251f32cb8b7a4c5c"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 23609600,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 14155776
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14163968
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 17309696
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.10.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23609344
}
],
"md5sum": "2595f256214e7d1c5f95c9bc6aa0f186"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "232b48592763afc18d40f776cb9df516"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6550eb4da3b0bcc06c9e936f11d51059"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.10.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.11.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "d43b6ed8093ee93f11883cedb4e6f78c"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "60eef1138e2526290ff1a8d02242c28c"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5fb13f7087a627cdd2a00d170674d3db"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.11.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.12.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "144a939e90b47e298736abdd893e010d"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8a1d7454519ed08d799019b22831a856"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "71bd8f509e34840328543293e62cba4a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.12.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.13.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "a727272de8eeb25389b18f6dee625bca"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e859e5508a41e0316cdd167360aaf66b"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5f3fb4fb416feab50dfd83f622ae3e70"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.13.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.14.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "84726b53314b0a6672fecbc6205199fd"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "87db13f96a60efe496570ab694cca1f3"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "35b41299aca23e8038015f1824a58433"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.14.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.15.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "e58ad003d17c7d189b58ec1bffb49517"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "690486d92847a8267bb7b66329c4b195"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3a437086fb333013296dcd51f727c757"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.15.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.16.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "5d76de0804c41f7a3a14d545efe72226"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bb325c8d8a2e70125370fbeab5fac9e4"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29884928,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.16.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 23593216
},
{
"name": "model.layers.17.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29884672
}
],
"md5sum": "4f4aa0088d7bb5f6b49b00992dac4fbd"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d61de2756806cceff594bd65f12f37d0"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8509678e48a0382760afcfa93b14b3e7"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.17.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.7.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "c3c7220d781d7b760389ca126fe01fdc"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "03afc6524a3caf1a38c2a9b36e3f39cd"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "520a11f4f94b9f437680f30866755af3"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.7.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 9437184
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437440
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 9445632
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12591360
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18882816
},
{
"name": "model.layers.8.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 18891008
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 18891264
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 31474176
}
],
"md5sum": "0937b2c004729490db9c2993000f7cc1"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7d5d4d1a8a67abe44253b175ecbf833a"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "03b373f66ee728cc1d765227c37e167f"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.8.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 9437184
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437440
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 9445632
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12591360
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18882816
},
{
"name": "model.layers.9.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 18891008
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 18891264
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 31474176
}
],
"md5sum": "da764df48928346e142a47c758bf7fc0"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "60bb2251ebef392d059f105b082fe3f7"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1654a295a958bc57abf78877eae80a16"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2483d707fc663b2c6efdf3fe6bcbe412"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 22053376,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.9.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 9437184
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437440
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 9445632
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12591360
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12599552
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 12607744
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 15753472
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22044928
},
{
"name": "model.layers.18.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 22053120
}
],
"md5sum": "495c8b84bcc15d85af071243cf4c862a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b045e0435378155baaa05bc8b5717ffe"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "066b924a843fbc14b48a132a9a866540"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.18.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.19.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "72d8121afa2c57c5ef3aa3edc74dafff"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a5e049b3127a7cc298237571d5273013"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "10590b39899bb63c94068abcbfd9c012"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.19.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.20.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "9207e46163374ef8cf2091d5f026b5ac"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6578dd0df44cbbd6b0b61fa0e25b8e90"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9f4a2ae39dcfbd4bb9bf5d883ecc2d76"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.20.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.21.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "1c9abd7f59328dd19a288b47646ce3dc"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "46168878291feeafc6a235230a01ed19"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "781db4892c220e5391a9d3cbdec878bf"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.21.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.22.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "6df99e76de4e5fbe0b91829cd923308c"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4215249eeb4ac7816e17711435389ccd"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8821a8bde06076bf2fa97eeb1fdd971d"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.22.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.23.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "78435293731df0072927ca5c59c755dd"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7bb639070ed05ee7d3ab491c74484426"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "867d2a064d00eb289b9dafcab9140428"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.23.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.24.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "6916b0cf9523408649e3645b7364fd3a"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "01b17dea593a7b7f402bb442bedc841d"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bec2739150969f2a8410d9b2f362de83"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.24.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.25.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "53d4ce074d1604954972f371b390794f"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1acb37c3022b3cd11607a712047f984f"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "48f4cdcd225033a255a6c033d83df859"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.25.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 26747136
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 33038592
},
{
"name": "model.layers.26.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 33046784
}
],
"md5sum": "6d9de9456d2c477b0ca82ea9cb682871"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "1d4b9c107af1a837cac12e725e266b49"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 29884928,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.26.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 23593216
},
{
"name": "model.layers.27.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29884672
}
],
"md5sum": "9ec7df91105bdf222a424171fd8ff1ff"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "60169bfdb03d69e788614e16ab210243"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b1d7eebee793886e2ec140e240012490"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "216100f7da07cd19d5c7ec707d07f160"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 29909248,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.27.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23592960
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23593216
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23601408
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26747136
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26755328
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26763520
}
],
"md5sum": "a815d755631c158a2db59df9d1fc8b70"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4e84d06e58ca6a9a7144d4e088e4fd9f"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "03516ec829271735ae80ae23ed74e8e2"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.28.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.28.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "3345c7cf852de51d09af4653e6da902a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "19fd6b90f4ab053efaf468b7abbe58eb"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "47416f13da82c52a60eb7df5ec121954"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.29.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.29.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "42b22e40910ed976935ea9212b2ffbe1"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b34e982ca0eefa1edb705b8ee189c830"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9e873070d99c20e5e20f83f390adcdcf"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.30.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.30.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "c5dd6e7bb9b36d311367c4386350f996"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bb8944db514110d3e5043a63a556ace9"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f11587c06154dcd00501f3c80f0ee780"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.31.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.31.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "3b927487425677c9f8c0d353605d3cd4"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d0b53b21d78956871fef2d7f9db40362"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "66b5d0428955d0a9f95277f1996fc279"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.32.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.32.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "dcf2d27e8de9c37c3b7dd60cb6472cf4"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "abfdc3a77eeafa7c61b564852fc193f8"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d437d45e7956d1fae9f7a224fe4f8dc2"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.33.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.33.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "c247be8eb0d7698a590ba63cd93d7da6"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
4096,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f8beae1ce1e9b08159fc6817c8a2616a"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
24576,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "1ec222de69cf7db8b152d1954b040daa"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 33047040,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.34.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.34.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
4096,
384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29901312
}
],
"md5sum": "7d43bb4ad4b631b2b9a5190b682fc1bf"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 29901312,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
24576,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6291456
},
{
"name": "model.layers.35.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 6299648
},
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6299904
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18882816
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20455680
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28844288
},
{
"name": "model.layers.35.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29892864
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29893120
}
],
"md5sum": "fdd36f67f1d14b73238704f19d212a92"
}
]
}