|
{ |
|
"metadata": { |
|
"ParamSize": 356, |
|
"ParamBytes": 24277639168.0, |
|
"BitsPerParam": 3.4298442455617955 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 65536000, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
32000, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b6f25b8d823cec4beddfb32efe4180b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d825c375e412c0aea7b2c746d28de0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df13bcd62bdfa32143e31ebdade22310" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c495ea360e53f7bd0315294c28468b3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24084480, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
32000, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048000, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 2048000 |
|
}, |
|
{ |
|
"name": "model.layers.30.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 16728064 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 24068096 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 24076288 |
|
} |
|
], |
|
"md5sum": "dcc924a0818be4335ceb64055dbccfd4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7cea697ec830d5cce084cd94ea0c8a8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.31.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "93c88283d48461bd5d13fd301c033c51" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 262144000, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
32000, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36cdb38eca678471e4831205151d8905" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "efab6e352a39a20ddaa989d800a1b579" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21635072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
} |
|
], |
|
"md5sum": "1d17b0cc46db7f5bbd6934d40bdfc5f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f623d9db7907f71b965b136fc5fa5a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.0.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "8133a23122264f9cc511a71bcf259dcd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6caf630335dfcc06a8d63fc869a874d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "c94c9445e531a10b00f636efe63eeb3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47514d592b58d3b0291be3ac9e15dfba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.1.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "eb3504f1c5e056acbe04db500a231910" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5281420f7acad18c6cf92cc605751772" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "d464e589a555bc8c09421182912203a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "35d238b1d0d59d552a9896fb049ab156" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.2.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "b6d46c8864ccf3b79758bec4da4d0184" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28e0431d31421918848a2eb4d707b48b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "738919daaa02205fbce94f9f6199e49c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "307b1939c34308ea3450298b5cc3dd87" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.3.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "80466c47aeaa45099b033730c17c34a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98fa9cd94b2f7d47baa7f65b130aae50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "8ed191bba9ae37ad3e66731d78c77122" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d96bb408302083522a5992acf512a3b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.10.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "f09453e9053a5bce9ece236ab2ce29f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e8670b3993cd01fa98e9a57e6c15716" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dcaf11c1b81b90f707e0c480598b77f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28966912, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.8.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 21626880 |
|
} |
|
], |
|
"md5sum": "00f75042249080f0081c2aca36a9b362" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b001cb6130fd80d9df1a5ae7ad0d9d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "019f3ea96f31183f7220b5827cc59f4f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29376512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.9.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 14696448 |
|
} |
|
], |
|
"md5sum": "8cff9d7cd8c7d5cfb74bbdc1d923932c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "10eedc1ccd7da709745584444fc7a9b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29065216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7405568 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7413760 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 7421952 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 20004864 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 20398080 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28786688 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 29048832 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 29057024 |
|
} |
|
], |
|
"md5sum": "a17c56b94e7a075f6a80b39475a9775d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e10c9b57828ac90c07c7533a9e2e9380" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.11.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "7950bfec7739d15dd58207a6dd0f9867" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "678912f9573f2e365c8b60cf9139e5d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "60bf1b2e6246fbfba4cd98c88536e7e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9215dba33848e89a6bfdaaa1fc40e867" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.12.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "ceda99e6d74c1545dbef5f45ba7473d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c1e9202f3d7b2e2b64b28ebde55d09b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "d4b0827ff60ff6fb3905d15d221ca3f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a76566e15edd4c1d3e463631f7edaec3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.13.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "f14efa4d35f290ea7cc7be4695898ea9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be0a84bb50c58231678f876150ef4068" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "d6a4d56fc79d05a51f9e8d172819b6fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "54efcdb5a51aa2757ddf4024d3c906b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.14.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "5b7ff8905d9c98df28ddf12cea2dec2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c131ddb076080b845620f4273aae01a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "0cae2107c3345736f8bcc8f09ad57bb9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "705b741a48ec40dc2fd5d951c368ce02" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.15.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "46df7e9632337685b1bddadf03a0dfdc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aff415451272f739b849f7cfa8963d91" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "2c745b4961383d131f8d4c6cc909bcd1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef6ce6dbd35a4125a90d12d95236c961" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.16.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "9553f29b6e6b84911e8c4d6d8f6c1585" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28d7243950a6b098f8646940aeed552b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "4f4a2955ae8d0c9ea7a80f8f7aa95154" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "931fb81039a0fb2638ca822015b580fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.17.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "e491db355a5239eaa230f3426bce8214" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec73a7543545bed19ca448b157387168" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "beabef8bdda8a822bb7107ac83fb13fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a2c00bb9dfa2282cec85fd2615a0331" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.18.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "8c9b95007ebdd5458dd5721fca1092d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "783334e283b867b4b3c6c8c433ddb7ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "c8586ecd44b4304f223bc62c53d7a0ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "59938356b53664f73423a0833a34f268" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.19.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "c28adc0b3276f183cbaa5b99e07c87ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52a2379015dee146993bab89e0465713" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "1ae6fa03364fe2d2ed238a00e1b5f6a3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4f9887c5088d87c1308b3e5477889ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.20.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "922ee3a62b434e509e4b9287ad68c784" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a8eee9e487d3394853a725d60bf57461" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "ce3be756a7abeda39cffada345d01e60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d1cf3d1c6157d483cbe3ad433ae6078c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.21.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "fad7df206c81ef93f4eefe4a8ed23ade" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e7d8ce1c8ee2caed046f4f347cc83dc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "54e776485bb5128956145f6ee0e72588" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8cd2be6ff2aa154f12b55a12ff41202f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.22.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "8d8a07d7d97798b955d2d8d8e967ca94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e417cae68dd79e734bb0cb0961807157" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "06949d6c2f0d8eada0226a4c32156e8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa77c5bb0268adfc658f756d8b57e344" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.23.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "912ba90dda7e2fd4aa9410f938978b8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a7487b40af49d8ab2c3a0495a42ce392" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "fdd04151f9dce083a676bcd9cc73648c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2452bdb5384efbe3894c42f2de87f386" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.24.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "e0592f7f8933b1dcf188658436178893" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28c669b9b4f15eaa296f897aed5dffdc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "7ddba3e470fe4d8e0dbf7f12e169018f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "18da56b2cdf2c3d3f1c02053044fae4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.25.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "be604e69a48be6fe5853d9291e3fe786" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ca510951e5c363b16fde7b290c4e95c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "dfcdfe4edbd8789f65e9437b016c98fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d1d8e89f9050e1371a082bab41bba357" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.26.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "6fd70b4c4c5354de6157ca8fe5d9071f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7155ceec71cf1ae48238261b7a999719" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "38aaa2b5547f7ee7f309557ab56beffd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b57fc3acc2ba55b9455975693d32eea8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.27.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "75dc2577150a1d74d090c659a6268006" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd8a89544c77dbc68dbc77dbf1f80ea0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "0818dd6b081c935cce30879e6d175276" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5db38a89f6f790f1e40c35964d1fabbb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.28.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "bff2ad72309f161570ff9a6d204ecc7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "828b020749074f504b8bf10ee50be5dc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "fb996f7a7431888333867f6ba23383f1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2437691e2a922f980ca39d2b11f84fb0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.29.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "e0f3246b5677013ba96770391141643d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21692416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.30.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 21626880 |
|
} |
|
], |
|
"md5sum": "b44176ba80edc4cafe4f91dd0bd7791a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b82eefff79f3995475cc68e21cabec7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "3dd14f4ee4c620308f6976e07d917141" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee40c95ddea0e3fd424e4c0bd9d82271" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.4.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "da352f924794e6795c387782ac4d9916" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21692416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.5.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 21626880 |
|
} |
|
], |
|
"md5sum": "bd4c7b87d2157a1e57519d4e3db75426" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aad2593a27e3a2a56e5adc1f78528fcd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "a65823777b5033f162aa70a77f0486b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bfef4c9e8a80780f8fa70b77e6c31183" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ea881086630d595870b2cd7346c346a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22028288 |
|
} |
|
], |
|
"md5sum": "3d32e5dbdad4f7357a79bbe68ca0ef93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2490bcd62d1d31a21aed752aa92d4109" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22085632, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.6.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
} |
|
], |
|
"md5sum": "ea7d265439328f86abe6b78cc2c35149" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 469762048, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.moe.e1_e3.q_weight", |
|
"shape": [ |
|
8, |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 469762048, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ddd7e062e8cb31eab3782e35dc37ff1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21643264, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21626880 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 21635072 |
|
} |
|
], |
|
"md5sum": "13795ac082e97154d7ba6502c71ffe18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.moe.e2.q_weight", |
|
"shape": [ |
|
8, |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75154ec6119edd8b944bb2fbf4424d0b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22102016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.moe.e1_e3.q_scale", |
|
"shape": [ |
|
8, |
|
28672, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14680064, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.moe.e2.q_scale", |
|
"shape": [ |
|
8, |
|
4096, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14680064 |
|
}, |
|
{ |
|
"name": "model.layers.7.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22085632 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22093824 |
|
} |
|
], |
|
"md5sum": "038ef9f4ef217953cb848459d7918bd5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21692416, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
}, |
|
{ |
|
"name": "model.layers.8.moe.gate.weight", |
|
"shape": [ |
|
8, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65536, |
|
"byteOffset": 21626880 |
|
} |
|
], |
|
"md5sum": "f4b87baa9b35a746997394c8bbbdae22" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21626880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
6144, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12976128 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
32 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 21364736 |
|
} |
|
], |
|
"md5sum": "31b7ada7d9db61d4d3a6654bc539aadb" |
|
} |
|
] |
|
} |