|
{ |
|
"metadata": { |
|
"ParamSize": 325, |
|
"ParamBytes": 4836966400.0, |
|
"BitsPerParam": 5.000929019225669 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e539d8934b6c83f511aa8f810b18703" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "feeaf0b383b94855c5c5c6d386f53ad2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "fcfaf02237d5d58a635ead2a4bed7922" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "34336158c30529b439de229b5b1261c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "16139e5afafc5e4bf3de1c5a62ebe92f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "530ec243722b4bfd3d914f2406d23c52" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.1.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "1d2574fa780f4f5c064e1ecf6a6445cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9617e1b605348261f8837f6a5628e50f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 189530112, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_weight", |
|
"shape": [ |
|
92544, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 189530112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ece99fb877ca17973fa9790d17b18a3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32604160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.tok_embeddings.q_scale", |
|
"shape": [ |
|
92544, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 23691264, |
|
"byteOffset": 8912896 |
|
} |
|
], |
|
"md5sum": "07234e14d3781a1a844f6207d2a7bf8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "60a1955d05548a3e9f74c20d3034b27f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4195ad1b5fdde1f2fa2ad4dae40635d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "71fd550d89f57749c5e839fce035a763" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b76828c6d0f2ddd12a200f2bc3fba3a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27271168, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
} |
|
], |
|
"md5sum": "0b08410b447fa4616b33649d27f9f52d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9ed774a881ca85f8f4e606e0d6d5483" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0076cfdb8aa72e6d58df09c7127eb278" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27803648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 7348224 |
|
}, |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.7.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 18358272 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 18366464 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 26755072 |
|
} |
|
], |
|
"md5sum": "30876b2267b1b29869c844b98dd3beea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "04cd5c4038c24c63ee5c1d0a4c95a704" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cac9f52d6b6b10ca14db75dc5dc5b20b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.8.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "7e5e6a77e1c410727523d4bd5582fe64" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4019a19d9a9d539f6a4d507e41005225" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7148a051e35378ea4daa4763fefa81fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "ab444011e9e8098b448375f3b0984407" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33046528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 3686400 |
|
} |
|
], |
|
"md5sum": "70290e0c2ac33fe6efe6950112d8ab10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19b3e3788e023aab28d1a18235554bb5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "eb11beb1c9e5700b06f695e4b10e19c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "704fa12d0eaac9511e5f68a0e3a4ce39" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.12.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "ea8937133a702143225be2a8aed7b869" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "12085db559930d58eeb982e4e96f2b1f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0c678d83dc9d7dcb1219b52d8d505f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.13.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "36f991978c68b4b3c21ee79e717bcfcd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1715786896c7a55055f42e301d6f4367" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb8bc7de495e59031b05a17d5d1fe3b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.14.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "b915094a2d74945c042510e24ef79d8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e6e9e80e09cd620350ac74db0dd42c99" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4d5f0c146ed20ffc2f0ab04308c4acb4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "6101c656793004d0c3abe53468c1ba34" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ad8969f716c6cbec7bba5e28a0e650f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "f3dd95f67b099821420d0f025b8fe28b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9196853fe53bd5131d678eac280e269c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.16.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "58f5adf8dda704ca53a073620f3abc01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d3152816734abc9dc50678d33f07e66" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4e964b339ecf6ddc3368ecf13c86331" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.17.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "aa8c4b20de5f6b2aef64820bd667eefb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fda6ff250690ebf2a8421ec1a7909e1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77f0d3f89cb203b305cefe472e800c88" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.18.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "d2776ceadf0d34c6ac191f5d66f8205e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7ec06353645eff457917722242ad5db3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87e659fe9e9fde958047a1f2ab793aa9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "8ff866572b4c2a08292392f01d8ca752" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0043281f3b7e969c386fede123a5fa8a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27271168, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
} |
|
], |
|
"md5sum": "71fb7ff691f7075abf5ac59330dab8be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78a389cff3e2fe1adcc870fed738a2e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33046528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 7348224 |
|
}, |
|
{ |
|
"name": "model.layers.2.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11026432 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 19415040 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20463616 |
|
} |
|
], |
|
"md5sum": "5dd67f4b2cab6851d0561870e04f2c7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d972e8547b42c7af3154460764e9aa43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8baabad9615ce3b96b869c14da54551" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.3.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "d10f8920d4cb656f364e57426aa94629" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3422f6cb1368a9f8c3d718fac58fd944" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "594134f0a343a9fedeac959b10ef5561" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.4.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "e713f3db94d7784a9cdad7994b547169" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3479c82ae4879af2f9cc5be7f2236f8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f019722fc2af5404055e6a949939afbe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "ec087c107d317ed9ce7652a9e3b65c45" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1fb576f10d2933c04d0c4f52ea3659f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "f01900d0604dd696e7f8613e4293de8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "858c9f4409d26fa013fbd18f666050b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.6.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "cc096ac89112da04e942402d37c10363" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 1581056 |
|
} |
|
], |
|
"md5sum": "7b2cbc2a063da330f727c1d3574a7407" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51651c19b7aac5a187c59e6e2036d266" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "42da7facdeb21669403ea6ff9ec1a900" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b5aaef7dd0aa3d2068ea197597ffce3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.21.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "f950a6804e7d125fc5182233a9b4efb4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "61e9ba6fc81835167a9415a8d239e7fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d6a7b45b01805baa866533827adc63a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.22.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "3e921e1f003f271fd91d9e905d75f539" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bfb30177e347d00861956fe20bd720bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d684bab08a1e01b50c1f8d35aa82444c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.23.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "f8bf6b7c4805bd81589370e666759892" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4814495d3aa21849181182c9f56c7461" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd00c482bf2ea60602082c760e217ed2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "6aca9a5982181455f6ab695f43e3e63d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2dc3f4e0bad98eb24e61f721756003c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "b88e068f612a35284e73332afaec2c8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "470196e1836a12c4af035ddcd0cff3d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.25.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "4a06d55f5e53d43f511148b55fa733e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b537be7a887aa7ed3635f6dcc367494" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c1ed3d9d743664df0c91a333477f9d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.26.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "2d482253dab5b1243946130481914231" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36558dabee89ee9c45b16a51b2147c93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ae2efe6107776384dcd08a919274b743" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.27.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "1d5ec44b356ca6c44840a16fe87dba70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbce60abbe0edc45dca0ec8fad700908" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "961eca288d4a7e06514bea9ef63b0d19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "14d14ac5b90af80d5861369fd880a557" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "99d00939a26b1a1d3bfafdad0730adb3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27271168, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
} |
|
], |
|
"md5sum": "4289b97d5c4c5e42e76181fc8a516bf0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed72009a9d6ab1c93e849eb98efc0828" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33046528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 7348224 |
|
}, |
|
{ |
|
"name": "model.layers.29.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11026432 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 19415040 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20463616 |
|
} |
|
], |
|
"md5sum": "0dc7912bd4be804dcdbab36382eac92b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f15d4264369433b484b9b6e126284e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3f5bfb71d791518e77f79a11d23118b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.30.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "c19d1584b937571acc372662949c0233" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "01f654a159939ffa9c4333a6f2ef9ea1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "847acd83639066adff8d9bde718704a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 189530112, |
|
"records": [ |
|
{ |
|
"name": "output.q_weight", |
|
"shape": [ |
|
92544, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 189530112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "065ed01010652f1deefb525fb6953785" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23691264, |
|
"records": [ |
|
{ |
|
"name": "output.q_scale", |
|
"shape": [ |
|
92544, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 23691264, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b4394df33e7ca3750ab376d5f1a601a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25190400, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.31.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25182208 |
|
} |
|
], |
|
"md5sum": "1db92a6218c6569b29d16d616de14cd1" |
|
} |
|
] |
|
} |