JackBinary's picture
Add files using upload-large-folder tool
bbd6a75 verified
{
"metadata": {
"ParamSize": 313,
"ParamBytes": 3439746048.0,
"BitsPerParam": 3.6133605651807272
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 218972160,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 218972160,
"byteOffset": 0
}
],
"md5sum": "b4fc39160c5d13cf63beb04be6da6974"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "2d561d8b5153af77f4b177385621b960"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "a3b467ba19b3b237ab15f7d36d617394"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 30776320,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27371520,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 27371520
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 27378688
}
],
"md5sum": "d5afab1c0f90905466d9d7551560db72"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "33ae4379385644dfae9eb6c1277ff4cf"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "dc2ac2d2fca2215d90f71654a21b3f29"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 30339072,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6819840
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 6827008
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 11987968
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 12633088
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 12640256
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16037888
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 22857728
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 22864896
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 22874112
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 29509632
}
],
"md5sum": "51fb81e215bfb6d41d4800c35d06b55a"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32994304,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 5806080
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 5813248
}
],
"md5sum": "65681f9d8a4d803fc283abbba4bdf10f"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "4eacd52be0910b9068c40072ee98d18e"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "c8bbfe069e662e3ecf23d13219e9cdf8"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "3a1c0e93b64e4bc06ccf62d77dc084f7"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 26909696,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 3397632
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10224640
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10233856
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16869376
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17698816
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22859776
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23504896
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
}
],
"md5sum": "16fe516adb4c90f3598111912d1fe552"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "97ebbb2accbc8cfb1e3aa3cec00acf07"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "5dadc5b949b864bd29f52d9ce6163b7f"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 30348288,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6819840
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 6827008
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 6836224
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 13471744
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 14301184
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 19462144
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20107264
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20114432
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 30331904
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 30339072
}
],
"md5sum": "8cdaf158e25684e3259f3a7d11885cd6"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "b8cb47f18acecf91f19d4686a57b734d"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "10fcc9cf4bfb9fe739f94bf70ddd8e1a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 13271040
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 13278208
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16675840
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "2aecbeda2ca5b9a7568fd2efbf185372"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 32994304,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 5806080
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 5813248
}
],
"md5sum": "615c85f8928381187355934b0d2f9279"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "ce51019d2157833ef0c49ae5fe84f015"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "65f4576995c60fafdbd50cfdb03d933b"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "33cd5958082609eb3641970450a0b51c"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 26909696,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 3397632
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10224640
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10233856
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16869376
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17698816
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22859776
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23504896
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
}
],
"md5sum": "c8a206944aedb223a5cfd35b10e5ef28"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "1b13d05efd8f61428213a346587849c8"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "69a91cb98369a2f2f11ba241f3d4a8c7"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 30348288,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6819840
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 6827008
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 6836224
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 13471744
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 14301184
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 19462144
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20107264
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20114432
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 30331904
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 30339072
}
],
"md5sum": "be683f8ee58ab0897ff72f5bcad1cabf"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 218972160,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 218972160,
"byteOffset": 0
}
],
"md5sum": "6eed88c33f805d5bfab2cda7a468fccd"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 27371520,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27371520,
"byteOffset": 0
}
],
"md5sum": "4f4368e3531e08689eef3e74d4e44dcf"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "239c75055c4fb495658a4ac3d5a07376"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "68afb29c0aaf687cdfa0573457670c13"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 30984192,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 13271040
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 13278208
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 13285376
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16683008
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23502848
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23510016
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23519232
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30154752
}
],
"md5sum": "41bfee786a991f6b8f09daeb5412c9d0"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 32994304,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 5806080
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 5813248
}
],
"md5sum": "b393618ce5f723676c06e84a03900e18"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "042fdc2cefa7587db1a3225ded6dc7d2"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "907475299861cec1e845a1975825a400"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "67ead573838c08de78f9bf64c4f12344"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 26909696,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 3397632
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10224640
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10233856
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16869376
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17698816
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22859776
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23504896
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
}
],
"md5sum": "e3a8c8015714c2d2da82091357b7717b"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "939be3b3c97611a00376fb8214d26751"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "ee6777e13a7815d925ef649f4759edfa"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 30348288,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6819840
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 6827008
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 6836224
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 13471744
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 14301184
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 19462144
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20107264
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20114432
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 30331904
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 30339072
}
],
"md5sum": "39c0c9ce88151a51153565fd1e20fd00"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "888a47fd5ed83111e3fff42e1228bd58"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "031a1b06f9148f66a84a1fdd9bccfdf3"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 13271040
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 13278208
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16675840
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "ed8eea457399a9795178d4c06435ad96"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 32994304,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 5806080
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 5813248
}
],
"md5sum": "5727feb7dce7d8c825ebd62c3fef0a73"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "3650e7b90d9f058921baac2356fd30c0"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "32086e3e6b7d1ad88991b0bac36249c3"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "ad54adde49211ebbf90aae6eb860644d"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 26909696,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 3397632
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10224640
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10233856
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16869376
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17698816
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22859776
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23504896
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
}
],
"md5sum": "cdd0c0962aba4e58bb8132868efedc64"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "e4498d56c4d053483957eee5f939911c"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "848af1e43e4813ca4f6788a98464db03"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 30348288,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6819840
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 6827008
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 6836224
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 13471744
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 14301184
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 19462144
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20107264
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20114432
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 30331904
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 30339072
}
],
"md5sum": "920c863af668e2b339a2d9fd1feb0b1c"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "4fcb8a1363c917a338591d6eb5960c31"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "c818acd9849724ee3a5444015b2b6a3f"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 13271040
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 13278208
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16675840
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "45e7be01822be63c11795abd05221ed2"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 32994304,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 5806080
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 5813248
}
],
"md5sum": "602f216e1760de5a7eca801cc5eab687"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "7b09065354e9b243b197460e9bfddcd4"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "3c9e065a3e9cd4bb1b24ff82906246b3"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "208922f688d0dd64819ad54b7dcfdf01"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 26909696,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 3397632
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10224640
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10233856
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16869376
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17698816
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22859776
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23504896
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
}
],
"md5sum": "3b2725f9be3bdfade2d53d104b7cdd84"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "781dde45a2054689005d6d6892d75676"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "fa77c72865126460156eb14b020798b7"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 30986240,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6819840
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 6827008
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 6836224
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 13471744
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 14301184
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 19462144
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 20107264
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 20116480
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 26752000
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 27581440
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 27588608
}
],
"md5sum": "9d4b24f756e1475fd17b7a89f1cf4ab5"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "4fd332b49d620e42dc04f7315e2d260e"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "7c287b9a13e4083a3d2f676880be1c75"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 30348288,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6819840
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 6827008
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 6836224
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 13471744
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 14301184
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 19462144
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20107264
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20114432
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 30331904
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 30339072
}
],
"md5sum": "a41dfe804aa359fcf5bc831b0812a8a2"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "88d98853742945b160c70bb1646dafb8"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "ee226420f0671dc1b0d7349d69f43d40"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 13271040
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 13278208
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16675840
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "d30aa67e9745914b9a027662125873e0"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 32994304,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 5806080
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 5813248
}
],
"md5sum": "bfcfd1893e04b52d348b3ca8683be209"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "e547b8d8d8425d2c3d08fb1caa6947cd"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "40f7480fd1dbe5eee62014b85fbbcb2c"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "aa7372e642e12dbb5aecf98e421e2b16"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 26909696,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 3397632
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10224640
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10233856
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16869376
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17698816
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22859776
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23504896
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
}
],
"md5sum": "e13e6bbf32c2c41dcd351883a95747d8"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "86d6137dc9e935b8beea2f515c5d921d"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "c3b16dee726e2be74ead249fecce9ccd"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 30348288,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6819840
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 6827008
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 6836224
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 13471744
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 14301184
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 19462144
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20107264
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20114432
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 30331904
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 30339072
}
],
"md5sum": "9f24b9451c58b830230514814b91c59a"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "68750e70312a5c7ec9f2964a6f826da8"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "c0e4f0965d619ebab3269ec24c3fc820"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 13271040
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 13278208
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16675840
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "39fea896add6b9d50fe46251438b1ebc"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 32994304,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 5806080
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 5813248
}
],
"md5sum": "f2b0c6d89b3f015dc58d6b18b66c29b9"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "84b1d749ef66443542f5f619de402ab3"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 23504896,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 3397632
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10224640
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10233856
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16869376
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17698816
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22859776
}
],
"md5sum": "0926df2f75e607591fe1c668a8942536"
}
]
}