junrushao's picture
Initial commit
6b98f93
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 5859265536.0,
"BitsPerParam": 3.6013014104119914
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65540096,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32002,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65540096,
"byteOffset": 0
}
],
"md5sum": "405ed1383e19618ca9de16b5a07c950c"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "a58fa64488aa733bfa0c0b6dff61c52f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "605784270aff6407173936ba328e0d45"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "dd06c00511a38ac0c041974a8903d4bf"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "fbf59071e62c576c355352f68c3deb9c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 26329600,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32002,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192512,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8192512
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 8202752
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11745792
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11756032
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11766272
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15309312
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22387200
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22397440
}
],
"md5sum": "acf8a9f03bf650d94783ee1dcdeef8cc"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "d5383e04e8e32bcdddab65e86a6d755e"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "a48c63509b3ddac9ee549eeeb08f2902"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "0aa84c2377fdb5977777b80ab0d1fa3d"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "2ad643291a166560b17471cf6f7a85ef"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "38fe08cf63f1947c46c1011229502dfc"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "51dd41b92a0659f813a9b5b433f3c65e"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "7cbe3bc78ad3e9e580e5b65271f495c2"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "49271c1c80d0ca6468078bc1403b849c"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "cffa00768cecc6d2494913449a8ecac3"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "e5aa5dd5633df6c3536c9a03edcb929b"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "86da7c6ce1e9faba69fe73e6def30838"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "ce3c18e893f8fba36010f318392c27b1"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "df9dd81037f248b7707046c293b3d2bc"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "8ee31dfcd211017fce2208f2c8803e1f"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "7ef2d6a5460d42594c78c0dda94d5083"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "45ce294c6d526904e7721976049da1fa"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "1f136d0884079fc0f82f6b52436a569d"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "2513956537056e22d19cd63f1e556f11"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "decf7db49bd46d6c73512c7d4e6dbfcd"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "6848f3119ddd895d4f9bd7e78dbc4ce1"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "84bc204e9cbbd2866ee47b7743ed016b"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "a1cfde4ebefc7ebb0cd424e88de6de69"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "b0a760907a8d4cf8f907caa2251d61e0"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "700cac2c8bff3aa823a64367125edb2f"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "3cd234034524d81f942ff0e9b9e17d3e"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "189d32c8f4d8fcdc2fbda71f867521e3"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "d12eb2e1d8537dfa3698c2260b030eda"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "091f814640fc8a97fd61b4bc68d6362c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "f28d3a261a967b24b451c10bc5a4aa31"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "6ff0a26be500e18b0756e9c9fd456dd7"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "4effb57441d62d554c25c7788300a80d"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "e9a1d00af6e2211f42d0f2c6e932b9cd"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 65540096,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32002,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65540096,
"byteOffset": 0
}
],
"md5sum": "22accb9b26f672d87c2f723c1d6f7f20"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "3de776a20e7e771a00ba6ce914bcab4c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "a8d62d234dfaa7a1d69e8d38d9a4cf29"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "112dc1dbb9515757312bcd554538e8dd"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 30640640,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32002,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192512,
"byteOffset": 11806720
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19999232
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 20009472
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 23552512
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30630400
}
],
"md5sum": "80ecb5c076651a55d6c9ec0670752b55"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "f008ebb65a98156a08250227f00003e1"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "29cdc3032a48fb58a46c609c9fb8c9d3"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "e7cf6184c101e3df8b6e593f2b507a0a"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 30302208,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 3932160
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 14417920
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 15728640
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 15738880
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 19281920
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 26359808
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 26370048
}
],
"md5sum": "9e6ff77845fd2b6d2af607e61c47e348"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "f7e685cb2b3400026f99d1eef65356c5"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "154d749416e6970411b6b77fe912a243"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "083519a30e0e203ba71a810b09071a6d"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "ce4db900ce509cafa51bc10607052d47"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "d106267f5e66a6003dd0632a183d9ddf"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "7dfdc2e126d4e36bba75b7da95d7d657"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "45e2432ec62115a5e3fce05c5a8b2cd6"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "7693fe69568b9c6df1f9b89177442c7b"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "b3033360a34d6088b414fbc713a7e10a"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "f281d04a1063d8643f940ad73ee0e1af"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "57f2028b43f7e959738cf4df001a9f42"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "22fc37ed88b123bbb66284efea170661"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "79a1435ec27887aee47aa41f392293a7"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "ef5934bbc7aa88e5f6206863d1abf6e4"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "03e80afa88ff14c5f2feb3bec6f2ac51"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "f17521658c466207c9d5315ff19f7507"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "7441c3e445fd2c2b10868f12ed962c6a"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "3558c73f7c67e495309f80f48a56ab04"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "16b5c1c705fa1f8c3812ca1e6e5b5e67"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "64a627d97a4fde812187e67c641c55d2"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "b9322382fbc54b0bfc9b676a2a995737"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "6223f445868a6d1cd08ffbc3fe9d8fd6"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "11e473bac8600407d7e093f00782e78e"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "2c6cd739ceac2a95454d3d0c5cfd0542"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 30302208,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 11796480
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 15728640
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 15738880
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 19281920
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 26359808
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 26370048
}
],
"md5sum": "a3c97c34504e1f05dffb0cf2c6b86a46"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "80d972df01961c5511c548cd3c99f830"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "55e06952bc351fc2e525319f2bedef6f"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "dbb0510f881b75c4ccc6a0dfbbb3124c"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "a527a512723bbbbd2f1cd7859bcf5d36"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "9eb4809bd54ea26a8b5126fcb117a70a"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "cba9c5fde3280bb564b45f5992f5b4e8"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "8a0411952df3088bb0afe38194bdeee3"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "6def8f3a5a7e3f74488aa106efb8fa34"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "b3b8c5634e25cb980236e2a4c1db56ca"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "cafaad0ddc27f632ef699c1d79cc0b06"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "508e5bb418a84c5ac5b70235cf413d87"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "63a8a55a4c2202fbed96d7b206faaf89"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "0c8a6bc6c1d67c613481826224d5a555"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "207003909175bbc8c716bced14aab119"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "f248ff085bc95810a1239a65367cb1ce"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "91875e6f6defa8aba88fd87ba10db520"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "522b9afa6928983564067ba5dd523764"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "95d1bf5856cc8da9232b25a5b1d00bdc"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "84e9a243252469fdd92a2d9fa0063116"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "09bec1b4895ab2131781901ce3ff8b12"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "2c988fe815af325db8db4ecfc0754f1c"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "696a534c02144c87ebcfd957c59a9fb8"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "65911230245ce194c7d2f3d2ab7ea0b2"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "ebf9a9f9bb8bb8f28aa2efe58e93be89"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "47bc71a2c231389b803863ce280b1fa8"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "305eccf926407f367f5e8c2f5c66cc60"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "9aa9df54db29fa362a6c4243ac638ea2"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "5cc276fc555fcb85b16ec1bd33f2e4eb"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "310de39c394dd583b7b02facc917b399"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "b2b35167b76d0e430119305436e8bbc9"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 32923648,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 22437888
}
],
"md5sum": "6a0de316a78d80a8be9f8beecb351e82"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "48da9eeb369fee83396cab84480cd841"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33208320,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1310720
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 1320960
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 29665280
}
],
"md5sum": "16d6edfcf84fbb533faeeeb1ba92d6f7"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "34be2b5dcb73e890b3ba1c9d127229d7"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "30888d852d86761771fc0c546ce6d3e9"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "3b04e7d5ae22efa395571d4479f3b027"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "219b005b06673061e8d3786868744e13"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 33458176,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 7077888
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 7088128
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 11020288
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 21506048
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22816768
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 22827008
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 26370048
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 33447936
}
],
"md5sum": "8ef8dfe78958bd65cccba37070c42f31"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "2d454bdeff08158481fe07be7f4ac4bf"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "ffc9c7bd9cae526fd582aad2a36b23eb"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "5568f3b982713524bbd358e7aab3eb21"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 30302208,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 3932160
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 14417920
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 15728640
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 15738880
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 19281920
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 26359808
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 26370048
}
],
"md5sum": "4a0fad45f1ba4433f234388da280b142"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "061fe863d0ef8316db73196b7c2307c1"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "1ee009765f2c84a91a9cd90292246dbf"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "efe78d994b1d3b8e545f5ead45f448c1"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "73256329d2a94277eb2e9c5022ceb016"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "3730dfcd04bd604da7c23eacbd0daa9f"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "2412e5b4ff494cddd14cc6823aac60ca"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "a4b8b2d94d99a53ecdb789e23597664d"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "e6e10f6d03703f35564ad43a6158dfbf"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "48c4b8aceb7db9a75311ecd761efa704"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "e3c36754984f1d2318ecb6bd4b78457a"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "ec8f50a6f275f5c45a7c3de9aa92e287"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "6776c4b3f8bce670eb91adb1498d100f"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "a36da2763a9a56450e25227b98c9eb02"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "fbfa208e20addea9c97bcc57113a29ed"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "628e3a91f5849f2b93ca5b7baafeb71d"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "77befcd28d8be9daf98c8409fc92a4b2"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "7c1d001b944daab166ab929b9c89127e"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "19edc9d3c56012c46cf888017f75c6be"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "dfd7d399073d2c49956b3f754cb45d55"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "28a51c6fdea48364cf17c42190c713c8"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "5359ac242b78e81ec42e1dccc8364af3"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "066ce33ec7712127e83c8cc08abe22e6"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "584500e8510ce5a4029133fcce8ed606"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "51430f1a502f17580f864420ac50cdac"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "66685beb1ac1aa0d43b929b244eff09e"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "070ccf5cafde328b2144c57120fef036"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "4de978984e5ed6cdbbd289bf307c6f91"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "cbd891bc2a7f9f7de05b4f7e58c48688"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "178572c3991bc1b598117565bc2e346e"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "61c7fc0f94f28c3cea0a10000ed50d1b"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "9cddd7c09e8c5406ffc3799baf045494"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "35465cb1085f65cce7df473b0bb936b5"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "778ca0b901c323afe31ec10f367c89d9"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "168fbdebbb234b034826597d46c4003d"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "cedfcac91b88fd728960ed57f07aef1a"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "e758922164119458bc1c1efed670789b"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "6a7743d5dff83cd45c6cf7691ce25bce"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "10fe461e3e252ab5610c718242c04716"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "a6728c64b1194a757c4f3d8e151abdff"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "b466e0b2b38548b778bd38dd6c80849f"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28344320,
"byteOffset": 0
}
],
"md5sum": "acff86b07643c47947f90d1dcae7daac"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "119e8c4673f754dca19be72915de5567"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "cb631e97dc5826011e16ff1e7292f099"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 26370048,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
346
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3543040,
"byteOffset": 11806720
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 15349760
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22427648
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 22437888
}
],
"md5sum": "367185e2389e54b2e4e8b16643332eef"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
27648,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "f355c67c918b2767a5ec9389e34e75ef"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
15360,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "e378260e39e1d7f44a4eab42f6fb2f45"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 33292288,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
27648,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7077888,
"byteOffset": 11796480
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
15360,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3932160,
"byteOffset": 18874368
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 22806528
}
],
"md5sum": "5bbfccf19889e3dc41d2712c33f209ff"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 1310720,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 0
}
],
"md5sum": "b883ba2d68de141a4e53e7c2e6a9f674"
}
]
}