JackBinary's picture
Add files using upload-large-folder tool
fe03a76 verified
{
"metadata": {
"ParamSize": 313,
"ParamBytes": 4760885248.0,
"BitsPerParam": 5.0011817065612245
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "510653a4cc50f6fa6407408fe593d3f0"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 34062336,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 34062336,
"byteOffset": 0
}
],
"md5sum": "3cf6e86315e40ca815ffbcdd6df900fe"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "415cf091377c59c8e066ce34cad87fc5"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "6ca48c5544b032d0e51df962fbb560d2"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "f1489373bfdc1844bc3c0d18b0b78871"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "05fc55cf9b8971f07fab5a240f75ddb2"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 32723968,
"records": [
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 7168
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 4250624
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 12737536
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 12744704
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 19167232
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 19970048
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 19977216
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 24220672
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 32707584
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 32714752
}
],
"md5sum": "2ca5c4bb1177c9b1b601ae07d92973e5"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "7ca14608dcf3446dc34939109daa5cb0"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "2e7b1becc450e3318fb795c227fd4b98"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "88fecf91939b086849aac8d69d288bd3"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "d88db17bf0b44d67a8c0eb6b74890506"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "fe152e83bae5a84f3f260519624d7116"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "4d50813ef1b0952b91ca4f5153db06fc"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "ce237b108acffa35790d4ef186b0affc"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "d6fd5565b48463982783dc2e94196652"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "c1590f9d2c95d719539585074a86323b"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "6162af2fe80ff1b3a2aaf3be305fbac2"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "0e2dbd64bc347e41e1af74280cae59b5"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "0e811f1f813352fe695ede58706cca7f"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "041cb00d7bf64114470ff2211ca61066"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "ee111a82c057e59e1b9ba61c4db3aa29"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "a2518cd0d5375a1202fac391ce80ea4e"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "851e9565156393e859c8f3751b0635bf"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "38aff0272d9c31b14c76c66d5fcc7cc3"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "4387c4d5f45e85e719ee7624aa10924c"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "26e8e1560c817c06eaab4338fe93d88f"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "08c9fc32541dd05d0f78c21e6f8bbe8d"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "2e16329b4a94eba16207d5dc84273105"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "89dfe76450c674a5125daf56c3acd498"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 34062336,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 34062336,
"byteOffset": 0
}
],
"md5sum": "b9e90bf1a497621371e41b6369d0c2a0"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5bbc337c80e44a500ed0ee7e753d7888"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "c60caab33946131753bee6793739ce0b"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 29276160,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16522240
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16529408
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20772864
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29259776
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29266944
}
],
"md5sum": "b925bedf2364dfeebbfdecaccea2ab69"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "09db6e5cfbf49c1f8648df42ab281d1d"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "4e5886bd1c79b624eb6f89644b3f798c"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "581f78f4da3dcc5262ec256c997b021a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "93a50f1194dfd17a3c626ac68a42693e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "551910720bd4668ea20f344a5cc0a452"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "10eb1099ba40343370ba992115ae7aed"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5f7195531cb9b4af525d81aea8352de2"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "442792e8264594176fcb5d7049227680"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "40711ac072a631528188d88f49ba9126"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "084dbce4a740aa3783a8d71eed58a15d"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "2450c7ee53558583f8ac3f458c439910"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "d41bd9d7020f307c0cf39d2818179b78"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "955074dfff39b1a78f9859095214b3d7"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "1582f92630a0b33b6c0f22c6368b01a4"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "013e2e4491feea9063ab544d3cf2ac30"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "40bf232984896547f2c9d5276617b7bc"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "27ad71c3d4ddf73dd864f1ee3b46b37d"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "5d73c4b8eb3900f199da62715988357b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "add9316d56e97d2ac71821d450b7d518"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "c965d6268e3575eba8b9db3fadafa54d"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "c690844c463de56196cb918ea7c9cc64"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "c9735f02498340fd3c68e791a6a24b66"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "8cf3c5a718e44923f41c8356848c9b59"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "2b86f58d51e5bd735a0e3bdebaf88816"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "2990ecf13b020f7d1ce6a57ae3bc7b2c"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "83e89f0247295e37d41adf8a54eb82c2"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "8537883e2ebc5feb60a6247988a6e7e4"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "963cd49bf14ffbf7cedc3311b2448d38"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "0dc624a7a0d0172cb319bf44a82c6947"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "ccb57b3d01e648bbc931a658838f9e7b"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5d0d1e1f2133e58533dcf81402e4c41e"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "8c95fae0058863fee1f6912fe4abe704"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 30064640,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 16515072
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 16524288
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 24781824
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 25814016
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 25821184
}
],
"md5sum": "95379d0574c73e15dd917fbdbc54c4c0"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "69120ad1870db92ce3e92361b9977951"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "956b7119c6995fc51ca53b046378912f"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "13bdebba56cb9aa95e5e2c9a50c71e9e"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "97c546bc5b7beac1fb3375842e879d62"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "555c36b9ec75b3467fd8e9aa77d52f56"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "e6a39eb3a717c47d2f18f593e7bb6761"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5c6a305e0c427a01ec6635f7dcde81e5"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "83de4d465eabc78a2013e2bd1e529642"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "924e88ef42600b938673703ff90fbd90"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "69f501185d7855e72344a64ae3b15b63"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "2529d21261f23702050aabf16647d5c1"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "29fa63635d282cf79354353f020093b0"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "ac233d327f2c836f0ae9c393c13ba7b1"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "7a64e2458c5d41d61728f30553ca06f9"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "59cb179392f6b0040b00ee3b874a5838"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "3a8854801d6f4a39dfc340b68ff6e125"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "f7a8cc4c7bc9f611b3c520bcc851e6a5"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "a61e9283bf03c8178d3539e5553b3ca1"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "e4c24e343f51c6ae303c105db9e7eb30"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "6b433383533ab3a524aa020230a1e00f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "3df59a73580d70c779f393007375ac4b"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 25018368,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 802816,
"byteOffset": 24215552
}
],
"md5sum": "d754d22d2eea8cb0fd62aef158fd0669"
}
]
}