{ "metadata": { "ParamSize": 325, "ParamBytes": 4836966400.0, "BitsPerParam": 5.000929019225669 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4e539d8934b6c83f511aa8f810b18703" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "feeaf0b383b94855c5c5c6d386f53ad2" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.0.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.0.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.0.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.0.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.0.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23601152 } ], "md5sum": "fcfaf02237d5d58a635ead2a4bed7922" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "34336158c30529b439de229b5b1261c9" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.0.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.1.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.1.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.1.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.1.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 }, { "name": "model.layers.1.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 27271168 } ], "md5sum": "16139e5afafc5e4bf3de1c5a62ebe92f" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "530ec243722b4bfd3d914f2406d23c52" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.1.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 7340032 }, { "name": "model.layers.1.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.2.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11018240 }, { "name": "model.layers.2.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19406848 }, { "name": "model.layers.2.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20455424 } ], "md5sum": "1d2574fa780f4f5c064e1ecf6a6445cd" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9617e1b605348261f8837f6a5628e50f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 189530112, "records": [ { "name": "model.tok_embeddings.q_weight", "shape": [ 92544, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 189530112, "byteOffset": 0 } ], "md5sum": "ece99fb877ca17973fa9790d17b18a3a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32604160, "records": [ { "name": "model.layers.2.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.2.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 1572864 }, { "name": "model.tok_embeddings.q_scale", "shape": [ 92544, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23691264, "byteOffset": 8912896 } ], "md5sum": "07234e14d3781a1a844f6207d2a7bf8d" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "60a1955d05548a3e9f74c20d3034b27f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c4195ad1b5fdde1f2fa2ad4dae40635d" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.10.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.10.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.10.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.10.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.10.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23601152 } ], "md5sum": "71fd550d89f57749c5e839fce035a763" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b76828c6d0f2ddd12a200f2bc3fba3a4" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.10.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.10.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.11.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.11.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.11.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.11.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 } ], "md5sum": "0b08410b447fa4616b33649d27f9f52d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b9ed774a881ca85f8f4e606e0d6d5483" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0076cfdb8aa72e6d58df09c7127eb278" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 27803648, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.7.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 7348224 }, { "name": "model.layers.7.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 14688256 }, { "name": "model.layers.7.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 18358272 }, { "name": "model.layers.8.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18366464 }, { "name": "model.layers.8.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 26755072 } ], "md5sum": "30876b2267b1b29869c844b98dd3beea" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "04cd5c4038c24c63ee5c1d0a4c95a704" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cac9f52d6b6b10ca14db75dc5dc5b20b" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.8.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.8.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 14155776 }, { "name": "model.layers.8.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14163968 }, { "name": "model.layers.8.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 21504000 }, { "name": "model.layers.8.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25174016 } ], "md5sum": "7e5e6a77e1c410727523d4bd5582fe64" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4019a19d9a9d539f6a4d507e41005225" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7148a051e35378ea4daa4763fefa81fb" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.9.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.9.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.9.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.9.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.9.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23601152 } ], "md5sum": "ab444011e9e8098b448375f3b0984407" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.9.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.9.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.11.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.layers.11.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3686400 } ], "md5sum": "70290e0c2ac33fe6efe6950112d8ab10" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "19b3e3788e023aab28d1a18235554bb5" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.11.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.11.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.12.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.12.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.12.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.12.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 }, { "name": "model.layers.12.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 27271168 } ], "md5sum": "eb11beb1c9e5700b06f695e4b10e19c9" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "704fa12d0eaac9511e5f68a0e3a4ce39" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.12.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 7340032 }, { "name": "model.layers.12.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.13.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11018240 }, { "name": "model.layers.13.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19406848 }, { "name": "model.layers.13.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20455424 } ], "md5sum": "ea8937133a702143225be2a8aed7b869" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "12085db559930d58eeb982e4e96f2b1f" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d0c678d83dc9d7dcb1219b52d8d505f7" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.13.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.13.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1572864 }, { "name": "model.layers.13.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 1581056 }, { "name": "model.layers.13.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 8921088 }, { "name": "model.layers.13.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12591104 }, { "name": "model.layers.14.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12599296 }, { "name": "model.layers.14.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 20987904 } ], "md5sum": "36f991978c68b4b3c21ee79e717bcfcd" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1715786896c7a55055f42e301d6f4367" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bb8bc7de495e59031b05a17d5d1fe3b0" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.14.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.14.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 14155776 }, { "name": "model.layers.14.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14163968 }, { "name": "model.layers.14.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 21504000 }, { "name": "model.layers.14.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25174016 } ], "md5sum": "b915094a2d74945c042510e24ef79d8d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e6e9e80e09cd620350ac74db0dd42c99" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4d5f0c146ed20ffc2f0ab04308c4acb4" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.15.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.15.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.15.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.15.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.15.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23601152 } ], "md5sum": "6101c656793004d0c3abe53468c1ba34" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9ad8969f716c6cbec7bba5e28a0e650f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.15.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.15.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.16.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.16.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.16.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.16.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 }, { "name": "model.layers.16.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 27271168 } ], "md5sum": "f3dd95f67b099821420d0f025b8fe28b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9196853fe53bd5131d678eac280e269c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 7340032 }, { "name": "model.layers.16.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.17.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11018240 }, { "name": "model.layers.17.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19406848 }, { "name": "model.layers.17.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20455424 } ], "md5sum": "58f5adf8dda704ca53a073620f3abc01" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3d3152816734abc9dc50678d33f07e66" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c4e964b339ecf6ddc3368ecf13c86331" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.17.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.17.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1572864 }, { "name": "model.layers.17.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 1581056 }, { "name": "model.layers.17.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 8921088 }, { "name": "model.layers.17.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12591104 }, { "name": "model.layers.18.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12599296 }, { "name": "model.layers.18.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 20987904 } ], "md5sum": "aa8c4b20de5f6b2aef64820bd667eefb" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fda6ff250690ebf2a8421ec1a7909e1a" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "77f0d3f89cb203b305cefe472e800c88" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.18.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.18.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 14155776 }, { "name": "model.layers.18.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14163968 }, { "name": "model.layers.18.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 21504000 }, { "name": "model.layers.18.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25174016 } ], "md5sum": "d2776ceadf0d34c6ac191f5d66f8205e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7ec06353645eff457917722242ad5db3" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "87e659fe9e9fde958047a1f2ab793aa9" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.19.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.19.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.19.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.19.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.19.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23601152 } ], "md5sum": "8ff866572b4c2a08292392f01d8ca752" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0043281f3b7e969c386fede123a5fa8a" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.19.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.19.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.20.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.20.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.20.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.20.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 } ], "md5sum": "71fb7ff691f7075abf5ac59330dab8be" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "78a389cff3e2fe1adcc870fed738a2e3" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.2.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.2.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 7348224 }, { "name": "model.layers.2.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11018240 }, { "name": "model.layers.3.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11026432 }, { "name": "model.layers.3.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19415040 }, { "name": "model.layers.3.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "5dd67f4b2cab6851d0561870e04f2c7f" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d972e8547b42c7af3154460764e9aa43" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d8baabad9615ce3b96b869c14da54551" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.3.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.3.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1572864 }, { "name": "model.layers.3.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 1581056 }, { "name": "model.layers.3.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 8921088 }, { "name": "model.layers.3.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12591104 }, { "name": "model.layers.4.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12599296 }, { "name": "model.layers.4.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 20987904 } ], "md5sum": "d10f8920d4cb656f364e57426aa94629" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3422f6cb1368a9f8c3d718fac58fd944" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "594134f0a343a9fedeac959b10ef5561" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.4.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.4.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 14155776 }, { "name": "model.layers.4.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14163968 }, { "name": "model.layers.4.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 21504000 }, { "name": "model.layers.4.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25174016 } ], "md5sum": "e713f3db94d7784a9cdad7994b547169" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3479c82ae4879af2f9cc5be7f2236f8d" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f019722fc2af5404055e6a949939afbe" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.5.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.5.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.5.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.5.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23601152 } ], "md5sum": "ec087c107d317ed9ce7652a9e3b65c45" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f1fb576f10d2933c04d0c4f52ea3659f" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.5.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.5.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.6.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.6.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.6.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.6.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 }, { "name": "model.layers.6.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 27271168 } ], "md5sum": "f01900d0604dd696e7f8613e4293de8c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "858c9f4409d26fa013fbd18f666050b0" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.6.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 7340032 }, { "name": "model.layers.6.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.7.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11018240 }, { "name": "model.layers.7.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19406848 }, { "name": "model.layers.7.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20455424 } ], "md5sum": "cc096ac89112da04e942402d37c10363" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.7.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.20.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1572864 }, { "name": "model.layers.20.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 1581056 } ], "md5sum": "7b2cbc2a063da330f727c1d3574a7407" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "51651c19b7aac5a187c59e6e2036d266" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.20.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.20.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.21.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.21.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.21.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.21.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 }, { "name": "model.layers.21.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 27271168 } ], "md5sum": "42da7facdeb21669403ea6ff9ec1a900" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2b5aaef7dd0aa3d2068ea197597ffce3" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.21.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 7340032 }, { "name": "model.layers.21.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.22.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11018240 }, { "name": "model.layers.22.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19406848 }, { "name": "model.layers.22.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20455424 } ], "md5sum": "f950a6804e7d125fc5182233a9b4efb4" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "61e9ba6fc81835167a9415a8d239e7fb" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1d6a7b45b01805baa866533827adc63a" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.22.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.22.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1572864 }, { "name": "model.layers.22.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 1581056 }, { "name": "model.layers.22.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 8921088 }, { "name": "model.layers.22.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12591104 }, { "name": "model.layers.23.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12599296 }, { "name": "model.layers.23.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 20987904 } ], "md5sum": "3e921e1f003f271fd91d9e905d75f539" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bfb30177e347d00861956fe20bd720bd" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d684bab08a1e01b50c1f8d35aa82444c" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.23.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 14155776 }, { "name": "model.layers.23.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14163968 }, { "name": "model.layers.23.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 21504000 }, { "name": "model.layers.23.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25174016 } ], "md5sum": "f8bf6b7c4805bd81589370e666759892" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4814495d3aa21849181182c9f56c7461" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dd00c482bf2ea60602082c760e217ed2" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.24.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.24.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.24.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.24.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.24.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.24.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23601152 } ], "md5sum": "6aca9a5982181455f6ab695f43e3e63d" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2dc3f4e0bad98eb24e61f721756003c6" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.24.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.24.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.25.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.25.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.25.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.25.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 }, { "name": "model.layers.25.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 27271168 } ], "md5sum": "b88e068f612a35284e73332afaec2c8f" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "470196e1836a12c4af035ddcd0cff3d8" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.25.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 7340032 }, { "name": "model.layers.25.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.26.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11018240 }, { "name": "model.layers.26.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19406848 }, { "name": "model.layers.26.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20455424 } ], "md5sum": "4a06d55f5e53d43f511148b55fa733e0" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1b537be7a887aa7ed3635f6dcc367494" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3c1ed3d9d743664df0c91a333477f9d5" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.26.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.26.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1572864 }, { "name": "model.layers.26.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 1581056 }, { "name": "model.layers.26.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 8921088 }, { "name": "model.layers.26.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12591104 }, { "name": "model.layers.27.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12599296 }, { "name": "model.layers.27.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 20987904 } ], "md5sum": "2d482253dab5b1243946130481914231" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "36558dabee89ee9c45b16a51b2147c93" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ae2efe6107776384dcd08a919274b743" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.27.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 14155776 }, { "name": "model.layers.27.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14163968 }, { "name": "model.layers.27.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 21504000 }, { "name": "model.layers.27.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25174016 } ], "md5sum": "1d5ec44b356ca6c44840a16fe87dba70" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dbce60abbe0edc45dca0ec8fad700908" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "961eca288d4a7e06514bea9ef63b0d19" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.28.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.28.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.28.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.28.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.28.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.28.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23601152 } ], "md5sum": "14d14ac5b90af80d5861369fd880a557" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "99d00939a26b1a1d3bfafdad0730adb3" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.28.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.28.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.29.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3678208 }, { "name": "model.layers.29.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 12066816 }, { "name": "model.layers.29.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 13115392 }, { "name": "model.layers.29.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 25698304 } ], "md5sum": "4289b97d5c4c5e42e76181fc8a516bf0" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ed72009a9d6ab1c93e849eb98efc0828" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.29.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.29.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 7348224 }, { "name": "model.layers.29.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11018240 }, { "name": "model.layers.30.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11026432 }, { "name": "model.layers.30.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19415040 }, { "name": "model.layers.30.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "0dc7912bd4be804dcdbab36382eac92b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9f15d4264369433b484b9b6e126284e3" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c3f5bfb71d791518e77f79a11d23118b" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.30.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.30.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1572864 }, { "name": "model.layers.30.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 1581056 }, { "name": "model.layers.30.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 8921088 }, { "name": "model.layers.30.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12591104 }, { "name": "model.layers.31.attention.wo.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12599296 }, { "name": "model.layers.31.attention.wo.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 20987904 } ], "md5sum": "c19d1584b937571acc372662949c0233" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.feed_forward.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "01f654a159939ffa9c4333a6f2ef9ea1" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.feed_forward.w2.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "847acd83639066adff8d9bde718704a4" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 189530112, "records": [ { "name": "output.q_weight", "shape": [ 92544, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 189530112, "byteOffset": 0 } ], "md5sum": "065ed01010652f1deefb525fb6953785" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "output.q_scale", "shape": [ 92544, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23691264, "byteOffset": 0 } ], "md5sum": "b4394df33e7ca3750ab376d5f1a601a9" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.31.attention.wqkv.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.attention.wqkv.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.31.attention_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 14155776 }, { "name": "model.layers.31.feed_forward.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14163968 }, { "name": "model.layers.31.feed_forward.w2.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 21504000 }, { "name": "model.layers.31.ffn_norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25174016 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25182208 } ], "md5sum": "1db92a6218c6569b29d16d616de14cd1" } ] }