internlm2_5-7b-chat-q4f32_1-MLC / ndarray-cache-b16.json
riczhou's picture
Upload folder using huggingface_hub
0dbf0e4 verified
raw
history blame
146 kB
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4836966400.0,
"BitsPerParam": 5.000929019225669
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4e539d8934b6c83f511aa8f810b18703"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "feeaf0b383b94855c5c5c6d386f53ad2"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.0.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.0.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.0.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.0.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.0.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23601152
}
],
"md5sum": "fcfaf02237d5d58a635ead2a4bed7922"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "34336158c30529b439de229b5b1261c9"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.0.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.0.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.1.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.1.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.1.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.1.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
},
{
"name": "model.layers.1.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27271168
}
],
"md5sum": "16139e5afafc5e4bf3de1c5a62ebe92f"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "530ec243722b4bfd3d914f2406d23c52"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33038336,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.1.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.1.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.2.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11018240
},
{
"name": "model.layers.2.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 19406848
},
{
"name": "model.layers.2.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20455424
}
],
"md5sum": "1d2574fa780f4f5c064e1ecf6a6445cd"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9617e1b605348261f8837f6a5628e50f"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 189530112,
"records": [
{
"name": "model.tok_embeddings.q_weight",
"shape": [
92544,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 189530112,
"byteOffset": 0
}
],
"md5sum": "ece99fb877ca17973fa9790d17b18a3a"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32604160,
"records": [
{
"name": "model.layers.2.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 1572864
},
{
"name": "model.tok_embeddings.q_scale",
"shape": [
92544,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 23691264,
"byteOffset": 8912896
}
],
"md5sum": "07234e14d3781a1a844f6207d2a7bf8d"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "60a1955d05548a3e9f74c20d3034b27f"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c4195ad1b5fdde1f2fa2ad4dae40635d"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.10.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.10.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.10.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.10.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23601152
}
],
"md5sum": "71fd550d89f57749c5e839fce035a763"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b76828c6d0f2ddd12a200f2bc3fba3a4"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.10.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.10.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.11.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.11.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.11.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.11.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
}
],
"md5sum": "0b08410b447fa4616b33649d27f9f52d"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b9ed774a881ca85f8f4e606e0d6d5483"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0076cfdb8aa72e6d58df09c7127eb278"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 27803648,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.7.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 7348224
},
{
"name": "model.layers.7.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 14688256
},
{
"name": "model.layers.7.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 18358272
},
{
"name": "model.layers.8.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18366464
},
{
"name": "model.layers.8.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 26755072
}
],
"md5sum": "30876b2267b1b29869c844b98dd3beea"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "04cd5c4038c24c63ee5c1d0a4c95a704"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cac9f52d6b6b10ca14db75dc5dc5b20b"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.8.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.8.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 14155776
},
{
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14163968
},
{
"name": "model.layers.8.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 21504000
},
{
"name": "model.layers.8.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25174016
}
],
"md5sum": "7e5e6a77e1c410727523d4bd5582fe64"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4019a19d9a9d539f6a4d507e41005225"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7148a051e35378ea4daa4763fefa81fb"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.9.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.9.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.9.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.9.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23601152
}
],
"md5sum": "ab444011e9e8098b448375f3b0984407"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.9.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.9.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.11.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.11.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 3686400
}
],
"md5sum": "70290e0c2ac33fe6efe6950112d8ab10"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "19b3e3788e023aab28d1a18235554bb5"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.11.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.11.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.12.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.12.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.12.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.12.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
},
{
"name": "model.layers.12.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27271168
}
],
"md5sum": "eb11beb1c9e5700b06f695e4b10e19c9"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "704fa12d0eaac9511e5f68a0e3a4ce39"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33038336,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.12.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.12.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.13.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11018240
},
{
"name": "model.layers.13.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 19406848
},
{
"name": "model.layers.13.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20455424
}
],
"md5sum": "ea8937133a702143225be2a8aed7b869"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "12085db559930d58eeb982e4e96f2b1f"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d0c678d83dc9d7dcb1219b52d8d505f7"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.13.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.13.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 1572864
},
{
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 1581056
},
{
"name": "model.layers.13.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 8921088
},
{
"name": "model.layers.13.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 12591104
},
{
"name": "model.layers.14.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12599296
},
{
"name": "model.layers.14.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 20987904
}
],
"md5sum": "36f991978c68b4b3c21ee79e717bcfcd"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1715786896c7a55055f42e301d6f4367"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bb8bc7de495e59031b05a17d5d1fe3b0"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.14.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.14.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 14155776
},
{
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14163968
},
{
"name": "model.layers.14.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 21504000
},
{
"name": "model.layers.14.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25174016
}
],
"md5sum": "b915094a2d74945c042510e24ef79d8d"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e6e9e80e09cd620350ac74db0dd42c99"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4d5f0c146ed20ffc2f0ab04308c4acb4"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.15.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.15.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.15.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.15.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23601152
}
],
"md5sum": "6101c656793004d0c3abe53468c1ba34"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9ad8969f716c6cbec7bba5e28a0e650f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.15.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.15.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.16.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.16.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.16.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.16.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
},
{
"name": "model.layers.16.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27271168
}
],
"md5sum": "f3dd95f67b099821420d0f025b8fe28b"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9196853fe53bd5131d678eac280e269c"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 33038336,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.16.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.16.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.17.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11018240
},
{
"name": "model.layers.17.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 19406848
},
{
"name": "model.layers.17.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20455424
}
],
"md5sum": "58f5adf8dda704ca53a073620f3abc01"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3d3152816734abc9dc50678d33f07e66"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c4e964b339ecf6ddc3368ecf13c86331"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.17.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.17.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 1572864
},
{
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 1581056
},
{
"name": "model.layers.17.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 8921088
},
{
"name": "model.layers.17.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 12591104
},
{
"name": "model.layers.18.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12599296
},
{
"name": "model.layers.18.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 20987904
}
],
"md5sum": "aa8c4b20de5f6b2aef64820bd667eefb"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fda6ff250690ebf2a8421ec1a7909e1a"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "77f0d3f89cb203b305cefe472e800c88"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.18.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.18.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 14155776
},
{
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14163968
},
{
"name": "model.layers.18.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 21504000
},
{
"name": "model.layers.18.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25174016
}
],
"md5sum": "d2776ceadf0d34c6ac191f5d66f8205e"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7ec06353645eff457917722242ad5db3"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "87e659fe9e9fde958047a1f2ab793aa9"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.19.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.19.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.19.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.19.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23601152
}
],
"md5sum": "8ff866572b4c2a08292392f01d8ca752"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0043281f3b7e969c386fede123a5fa8a"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.19.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.19.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.20.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.20.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.20.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.20.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
}
],
"md5sum": "71fb7ff691f7075abf5ac59330dab8be"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "78a389cff3e2fe1adcc870fed738a2e3"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.2.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.2.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7348224
},
{
"name": "model.layers.2.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.3.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11026432
},
{
"name": "model.layers.3.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 19415040
},
{
"name": "model.layers.3.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "5dd67f4b2cab6851d0561870e04f2c7f"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d972e8547b42c7af3154460764e9aa43"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d8baabad9615ce3b96b869c14da54551"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.3.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.3.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 1572864
},
{
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 1581056
},
{
"name": "model.layers.3.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 8921088
},
{
"name": "model.layers.3.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 12591104
},
{
"name": "model.layers.4.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12599296
},
{
"name": "model.layers.4.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 20987904
}
],
"md5sum": "d10f8920d4cb656f364e57426aa94629"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3422f6cb1368a9f8c3d718fac58fd944"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "594134f0a343a9fedeac959b10ef5561"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.4.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.4.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 14155776
},
{
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14163968
},
{
"name": "model.layers.4.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 21504000
},
{
"name": "model.layers.4.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25174016
}
],
"md5sum": "e713f3db94d7784a9cdad7994b547169"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3479c82ae4879af2f9cc5be7f2236f8d"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f019722fc2af5404055e6a949939afbe"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.5.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.5.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.5.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.5.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23601152
}
],
"md5sum": "ec087c107d317ed9ce7652a9e3b65c45"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f1fb576f10d2933c04d0c4f52ea3659f"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.5.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.5.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.6.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.6.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.6.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.6.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
},
{
"name": "model.layers.6.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27271168
}
],
"md5sum": "f01900d0604dd696e7f8613e4293de8c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "858c9f4409d26fa013fbd18f666050b0"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33038336,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.6.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.6.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.7.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11018240
},
{
"name": "model.layers.7.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 19406848
},
{
"name": "model.layers.7.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20455424
}
],
"md5sum": "cc096ac89112da04e942402d37c10363"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.7.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.20.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 1572864
},
{
"name": "model.layers.20.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 1581056
}
],
"md5sum": "7b2cbc2a063da330f727c1d3574a7407"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "51651c19b7aac5a187c59e6e2036d266"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.20.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.20.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.21.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.21.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.21.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.21.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
},
{
"name": "model.layers.21.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27271168
}
],
"md5sum": "42da7facdeb21669403ea6ff9ec1a900"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2b5aaef7dd0aa3d2068ea197597ffce3"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33038336,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.21.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.21.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.22.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11018240
},
{
"name": "model.layers.22.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 19406848
},
{
"name": "model.layers.22.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20455424
}
],
"md5sum": "f950a6804e7d125fc5182233a9b4efb4"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "61e9ba6fc81835167a9415a8d239e7fb"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1d6a7b45b01805baa866533827adc63a"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.22.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.22.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 1572864
},
{
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 1581056
},
{
"name": "model.layers.22.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 8921088
},
{
"name": "model.layers.22.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 12591104
},
{
"name": "model.layers.23.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12599296
},
{
"name": "model.layers.23.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 20987904
}
],
"md5sum": "3e921e1f003f271fd91d9e905d75f539"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bfb30177e347d00861956fe20bd720bd"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d684bab08a1e01b50c1f8d35aa82444c"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.23.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 14155776
},
{
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14163968
},
{
"name": "model.layers.23.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 21504000
},
{
"name": "model.layers.23.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25174016
}
],
"md5sum": "f8bf6b7c4805bd81589370e666759892"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4814495d3aa21849181182c9f56c7461"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "dd00c482bf2ea60602082c760e217ed2"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.24.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.24.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.24.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.24.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.24.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23601152
}
],
"md5sum": "6aca9a5982181455f6ab695f43e3e63d"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2dc3f4e0bad98eb24e61f721756003c6"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.24.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.24.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.25.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.25.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.25.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.25.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
},
{
"name": "model.layers.25.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27271168
}
],
"md5sum": "b88e068f612a35284e73332afaec2c8f"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "470196e1836a12c4af035ddcd0cff3d8"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 33038336,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.25.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.25.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.26.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11018240
},
{
"name": "model.layers.26.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 19406848
},
{
"name": "model.layers.26.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20455424
}
],
"md5sum": "4a06d55f5e53d43f511148b55fa733e0"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1b537be7a887aa7ed3635f6dcc367494"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3c1ed3d9d743664df0c91a333477f9d5"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.26.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.26.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 1572864
},
{
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 1581056
},
{
"name": "model.layers.26.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 8921088
},
{
"name": "model.layers.26.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 12591104
},
{
"name": "model.layers.27.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12599296
},
{
"name": "model.layers.27.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 20987904
}
],
"md5sum": "2d482253dab5b1243946130481914231"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "36558dabee89ee9c45b16a51b2147c93"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ae2efe6107776384dcd08a919274b743"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.27.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 14155776
},
{
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14163968
},
{
"name": "model.layers.27.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 21504000
},
{
"name": "model.layers.27.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25174016
}
],
"md5sum": "1d5ec44b356ca6c44840a16fe87dba70"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "dbce60abbe0edc45dca0ec8fad700908"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "961eca288d4a7e06514bea9ef63b0d19"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.28.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.28.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.28.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.28.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.28.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23601152
}
],
"md5sum": "14d14ac5b90af80d5861369fd880a557"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "99d00939a26b1a1d3bfafdad0730adb3"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.28.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.28.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.29.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3678208
},
{
"name": "model.layers.29.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 12066816
},
{
"name": "model.layers.29.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 13115392
},
{
"name": "model.layers.29.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 25698304
}
],
"md5sum": "4289b97d5c4c5e42e76181fc8a516bf0"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ed72009a9d6ab1c93e849eb98efc0828"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.29.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.29.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 7348224
},
{
"name": "model.layers.29.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.30.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11026432
},
{
"name": "model.layers.30.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 19415040
},
{
"name": "model.layers.30.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "0dc7912bd4be804dcdbab36382eac92b"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9f15d4264369433b484b9b6e126284e3"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c3f5bfb71d791518e77f79a11d23118b"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.30.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.30.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 1572864
},
{
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 1581056
},
{
"name": "model.layers.30.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 8921088
},
{
"name": "model.layers.30.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 12591104
},
{
"name": "model.layers.31.attention.wo.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12599296
},
{
"name": "model.layers.31.attention.wo.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 20987904
}
],
"md5sum": "c19d1584b937571acc372662949c0233"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "01f654a159939ffa9c4333a6f2ef9ea1"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.feed_forward.w2.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "847acd83639066adff8d9bde718704a4"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 189530112,
"records": [
{
"name": "output.q_weight",
"shape": [
92544,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 189530112,
"byteOffset": 0
}
],
"md5sum": "065ed01010652f1deefb525fb6953785"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "output.q_scale",
"shape": [
92544,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 23691264,
"byteOffset": 0
}
],
"md5sum": "b4394df33e7ca3750ab376d5f1a601a9"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.31.attention.wqkv.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.attention.wqkv.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.31.attention_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 14155776
},
{
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14163968
},
{
"name": "model.layers.31.feed_forward.w2.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 21504000
},
{
"name": "model.layers.31.ffn_norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25174016
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25182208
}
],
"md5sum": "1db92a6218c6569b29d16d616de14cd1"
}
]
}