illian01's picture
Add weights and config
808a262
{
"metadata": {
"ParamSize": 175,
"ParamBytes": 2082955264.0,
"BitsPerParam": 4.50044525764654
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "0c80f07c0e705a2e2c1cd1d241a4b609"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33357824,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 8192000
}
],
"md5sum": "fa05756d3a4aed82c316579089540aad"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "19060424c69b3c54f779bbd7392e9be6"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4b814bbb00bc27d9a9e3850331579676"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fcde8719ca33311adb9e209901a7a1e1"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "7cce174cd57c88824eb70a4ef149c88c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "97b234afce902138e94878f8ef1a629c"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a736145d80831f744644da17850b1bb0"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "0b4f5bc34791a1e54dea410ec4c052b6"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4db94cff0e44289f9eb44649315c1943"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7c5a83bd1092af3833edef1a9adaa42d"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "98699a3c8ec694dbf09a9381dc979df4"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "b5eadcd590ed9992bf56fa27361040a6"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c0913953918e9ff10b393a2350c541a8"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1104e4e7bed02d7e43586f5dcb57fbc8"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "b99070c18ef1502bf8efbd1f9c680bbd"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "1e5a41fbee06aa0c682d28fe5f2a7f8e"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "fd8b195c0fe64cc679731e416fcbda52"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c8a5c784a179b2d26121e20ef4518e52"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "48d58ccb88b0d6d82622304daffd17cc"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7bb64fee1fb04ebad5911123414e7fb5"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "79d7bba862e0627a3ce87c78fd0dde4b"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "71dddb4c8aca779927e9085c74a4bcf1"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "d5e0d9fc20163dcc5ab77757afbd0acf"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "522a3e1535cf5940f47ca1132e4d94f1"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5625f757d660c2b5f6894c0a452b4f9e"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "c123b8433a0d596eea37c76bffd74c6d"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "805e55a7b4e85081f2e8dd60ef166d17"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dae988315b9cecd5c082a601ba10aaee"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "36cee653735ee2a05805c04ea3dcc1f7"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "49bce51187a8083c60a9538181193a97"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6d6079646311cfbd581ad2ae09c512d7"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "80c576a52474d7eb2735769b84b3da0a"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "dce80d098b2b47b0a422ca585a8d857a"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "809ef224699ace8b7b71b36566a58eb3"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "627d3f5e0b7bc2972a13aef95bee1bc5"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "cdd944ef3abc535c16ab5baebf5515b6"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "84a0d8df7b05f28a5fbf683152ccb758"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d302a8927ab13bdb23b5dfc577c6b915"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e5a69957aac71da78feb9b23c5fb5a3a"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "1ad6af1d7d02cc573ff6ae8a31ae7d64"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "36f3a1edafceb384df49e23b68a1623f"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "41c10ea6f4183a339c9495544a5073a4"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "11e2349a5804e96ab4b016beb9216906"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "cf198550156434640f526fec3638c905"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "58342ee279d6a4a8b2f657b36f27bb46"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0c56ff5a360bcebaf82be0a014147c52"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "01ee8ad6a934879f6d1ec61034ee59ae"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "665554a4a88ea6721472b55119f63fe6"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "593150566899d0e627799200b84eb067"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "7ff1822429ca9857d43e880758c32030"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2cc15cb7e3aacd3647fcabd5d8f17ef9"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e7339464db06df5d0bd690f80bf0dd41"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "10ff9c7c4343d42dd0d240d7c89fdb1a"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "1269bece685abdb06e72d24b0d0320e3"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "76cfc2fe6a4f3cc78b7a06ac317db286"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8d8bd945869313bf34f495460ff473c0"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "8968721bb0bca4d937a41d73a4464573"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8b481258c607bec0b84d5dbef9c9228f"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "199eaac5804cd946f7249fdd519d4251"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
512,
32000
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "d08e47868866d4246fcbea7a7c86a21b"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29253632,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21053440
},
{
"name": "lm_head.q_scale",
"shape": [
128,
32000
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 21061632
}
],
"md5sum": "72e2e78ee16f32e8e8d93f88aeede5e7"
}
]
}