numen-tech's picture
Add weights
d43d113
{
"metadata": {
"ParamSize": 356,
"ParamBytes": 24277639168.0,
"BitsPerParam": 3.4298442455617955
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "3b6f25b8d823cec4beddfb32efe4180b"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.30.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "9d825c375e412c0aea7b2c746d28de0a"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.30.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "df13bcd62bdfa32143e31ebdade22310"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.31.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "c495ea360e53f7bd0315294c28468b3e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 24084480,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048000,
"byteOffset": 0
},
{
"name": "model.layers.30.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 2048000
},
{
"name": "model.layers.30.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 16728064
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24068096
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24076288
}
],
"md5sum": "dcc924a0818be4335ceb64055dbccfd4"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.31.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "7cea697ec830d5cce084cd94ea0c8a8c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.31.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.31.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.31.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "93c88283d48461bd5d13fd301c033c51"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
32000,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "36cdb38eca678471e4831205151d8905"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.0.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "efab6e352a39a20ddaa989d800a1b579"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 21635072,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
}
],
"md5sum": "1d17b0cc46db7f5bbd6934d40bdfc5f5"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.0.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "1f623d9db7907f71b965b136fc5fa5a9"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.0.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.0.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.0.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "8133a23122264f9cc511a71bcf259dcd"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.1.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "6caf630335dfcc06a8d63fc869a874d8"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "c94c9445e531a10b00f636efe63eeb3a"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.1.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "47514d592b58d3b0291be3ac9e15dfba"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.1.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.1.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.1.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "eb3504f1c5e056acbe04db500a231910"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.2.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "5281420f7acad18c6cf92cc605751772"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "d464e589a555bc8c09421182912203a5"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.2.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "35d238b1d0d59d552a9896fb049ab156"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.2.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.2.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.2.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "b6d46c8864ccf3b79758bec4da4d0184"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.3.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "28e0431d31421918848a2eb4d707b48b"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "738919daaa02205fbce94f9f6199e49c"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.3.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "307b1939c34308ea3450298b5cc3dd87"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.3.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.3.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.3.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "80466c47aeaa45099b033730c17c34a8"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.10.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "98fa9cd94b2f7d47baa7f65b130aae50"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "8ed191bba9ae37ad3e66731d78c77122"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.10.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d96bb408302083522a5992acf512a3b5"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.10.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.10.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.10.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "f09453e9053a5bce9ece236ab2ce29f4"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.8.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2e8670b3993cd01fa98e9a57e6c15716"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.8.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "dcaf11c1b81b90f707e0c480598b77f6"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 28966912,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.8.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 21626880
}
],
"md5sum": "00f75042249080f0081c2aca36a9b362"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.9.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "2b001cb6130fd80d9df1a5ae7ad0d9d8"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.9.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "019f3ea96f31183f7220b5827cc59f4f"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.8.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 14680064
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 14688256
},
{
"name": "model.layers.9.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 14696448
}
],
"md5sum": "8cff9d7cd8c7d5cfb74bbdc1d923932c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.11.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "10eedc1ccd7da709745584444fc7a9b3"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29065216,
"records": [
{
"name": "model.layers.9.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.9.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 7340032
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7405568
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7413760
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7421952
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 20004864
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20398080
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28786688
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29048832
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29057024
}
],
"md5sum": "a17c56b94e7a075f6a80b39475a9775d"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.11.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e10c9b57828ac90c07c7533a9e2e9380"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.11.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.11.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.11.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "7950bfec7739d15dd58207a6dd0f9867"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.12.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "678912f9573f2e365c8b60cf9139e5d0"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "60bf1b2e6246fbfba4cd98c88536e7e5"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.12.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "9215dba33848e89a6bfdaaa1fc40e867"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.12.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.12.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.12.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "ceda99e6d74c1545dbef5f45ba7473d9"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.13.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "1c1e9202f3d7b2e2b64b28ebde55d09b"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "d4b0827ff60ff6fb3905d15d221ca3f4"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.13.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a76566e15edd4c1d3e463631f7edaec3"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.13.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.13.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.13.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "f14efa4d35f290ea7cc7be4695898ea9"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.14.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "be0a84bb50c58231678f876150ef4068"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "d6a4d56fc79d05a51f9e8d172819b6fb"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.14.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "54efcdb5a51aa2757ddf4024d3c906b1"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.14.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.14.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.14.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "5b7ff8905d9c98df28ddf12cea2dec2f"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.15.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "c131ddb076080b845620f4273aae01a1"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "0cae2107c3345736f8bcc8f09ad57bb9"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.15.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "705b741a48ec40dc2fd5d951c368ce02"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.15.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.15.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.15.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "46df7e9632337685b1bddadf03a0dfdc"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.16.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "aff415451272f739b849f7cfa8963d91"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "2c745b4961383d131f8d4c6cc909bcd1"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.16.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ef6ce6dbd35a4125a90d12d95236c961"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.16.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.16.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.16.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "9553f29b6e6b84911e8c4d6d8f6c1585"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.17.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "28d7243950a6b098f8646940aeed552b"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "4f4a2955ae8d0c9ea7a80f8f7aa95154"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.17.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "931fb81039a0fb2638ca822015b580fe"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.17.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.17.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.17.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "e491db355a5239eaa230f3426bce8214"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.18.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "ec73a7543545bed19ca448b157387168"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "beabef8bdda8a822bb7107ac83fb13fa"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.18.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "8a2c00bb9dfa2282cec85fd2615a0331"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.18.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.18.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.18.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "8c9b95007ebdd5458dd5721fca1092d3"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.19.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "783334e283b867b4b3c6c8c433ddb7ee"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "c8586ecd44b4304f223bc62c53d7a0ef"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.19.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "59938356b53664f73423a0833a34f268"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.19.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.19.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.19.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "c28adc0b3276f183cbaa5b99e07c87ef"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.20.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "52a2379015dee146993bab89e0465713"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "1ae6fa03364fe2d2ed238a00e1b5f6a3"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.20.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d4f9887c5088d87c1308b3e5477889ed"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.20.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.20.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.20.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "922ee3a62b434e509e4b9287ad68c784"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.21.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "a8eee9e487d3394853a725d60bf57461"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "ce3be756a7abeda39cffada345d01e60"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.21.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d1cf3d1c6157d483cbe3ad433ae6078c"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.21.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.21.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.21.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "fad7df206c81ef93f4eefe4a8ed23ade"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.22.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "e7d8ce1c8ee2caed046f4f347cc83dc2"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "54e776485bb5128956145f6ee0e72588"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.22.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "8cd2be6ff2aa154f12b55a12ff41202f"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.22.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.22.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.22.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "8d8a07d7d97798b955d2d8d8e967ca94"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.23.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "e417cae68dd79e734bb0cb0961807157"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "06949d6c2f0d8eada0226a4c32156e8e"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.23.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "fa77c5bb0268adfc658f756d8b57e344"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.23.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.23.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.23.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "912ba90dda7e2fd4aa9410f938978b8d"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.24.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "a7487b40af49d8ab2c3a0495a42ce392"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "fdd04151f9dce083a676bcd9cc73648c"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.24.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2452bdb5384efbe3894c42f2de87f386"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.24.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.24.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.24.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "e0592f7f8933b1dcf188658436178893"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.25.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "28c669b9b4f15eaa296f897aed5dffdc"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "7ddba3e470fe4d8e0dbf7f12e169018f"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.25.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "18da56b2cdf2c3d3f1c02053044fae4b"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.25.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.25.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.25.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "be604e69a48be6fe5853d9291e3fe786"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.26.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "8ca510951e5c363b16fde7b290c4e95c"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "dfcdfe4edbd8789f65e9437b016c98fd"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.26.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d1d8e89f9050e1371a082bab41bba357"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.26.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.26.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.26.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "6fd70b4c4c5354de6157ca8fe5d9071f"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.27.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "7155ceec71cf1ae48238261b7a999719"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "38aaa2b5547f7ee7f309557ab56beffd"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.27.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "b57fc3acc2ba55b9455975693d32eea8"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.27.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.27.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.27.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "75dc2577150a1d74d090c659a6268006"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.28.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "dd8a89544c77dbc68dbc77dbf1f80ea0"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "0818dd6b081c935cce30879e6d175276"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.28.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "5db38a89f6f790f1e40c35964d1fabbb"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.28.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.28.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.28.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "bff2ad72309f161570ff9a6d204ecc7f"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.29.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "828b020749074f504b8bf10ee50be5dc"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "fb996f7a7431888333867f6ba23383f1"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.29.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2437691e2a922f980ca39d2b11f84fb0"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.29.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.29.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.29.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "e0f3246b5677013ba96770391141643d"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 21692416,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.30.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21626880
}
],
"md5sum": "b44176ba80edc4cafe4f91dd0bd7791a"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.4.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "3b82eefff79f3995475cc68e21cabec7"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "3dd14f4ee4c620308f6976e07d917141"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.4.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ee40c95ddea0e3fd424e4c0bd9d82271"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.4.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.4.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.4.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "da352f924794e6795c387782ac4d9916"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 21692416,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.5.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21626880
}
],
"md5sum": "bd4c7b87d2157a1e57519d4e3db75426"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.5.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "aad2593a27e3a2a56e5adc1f78528fcd"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "a65823777b5033f162aa70a77f0486b9"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.5.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "bfef4c9e8a80780f8fa70b77e6c31183"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.6.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "5ea881086630d595870b2cd7346c346a"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.5.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.5.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "3d32e5dbdad4f7357a79bbe68ca0ef93"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.6.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2490bcd62d1d31a21aed752aa92d4109"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 22085632,
"records": [
{
"name": "model.layers.6.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.6.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.6.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
}
],
"md5sum": "ea7d265439328f86abe6b78cc2c35149"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.7.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "ddd7e062e8cb31eab3782e35dc37ff1a"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 21643264,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
}
],
"md5sum": "13795ac082e97154d7ba6502c71ffe18"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.7.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "75154ec6119edd8b944bb2fbf4424d0b"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 22102016,
"records": [
{
"name": "model.layers.7.moe.e1_e3.q_scale",
"shape": [
8,
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.7.moe.e2.q_scale",
"shape": [
8,
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14680064
},
{
"name": "model.layers.7.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22020096
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22085632
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22093824
}
],
"md5sum": "038ef9f4ef217953cb848459d7918bd5"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 21692416,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.8.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21626880
}
],
"md5sum": "f4b87baa9b35a746997394c8bbbdae22"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "31b7ada7d9db61d4d3a6654bc539aadb"
}
]
}