diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,4375 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 4836966400.0, + "BitsPerParam": 5.000929019225669 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4e539d8934b6c83f511aa8f810b18703" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "feeaf0b383b94855c5c5c6d386f53ad2" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.0.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.0.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.0.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.0.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.0.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23601152 + } + ], + "md5sum": "fcfaf02237d5d58a635ead2a4bed7922" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "34336158c30529b439de229b5b1261c9" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.0.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.0.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.1.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.1.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.1.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.1.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + }, + { + "name": "model.layers.1.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27271168 + } + ], + "md5sum": "16139e5afafc5e4bf3de1c5a62ebe92f" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "530ec243722b4bfd3d914f2406d23c52" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33038336, + "records": [ + { + "name": "model.layers.1.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.1.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7340032 + }, + { + "name": "model.layers.1.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.2.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11018240 + }, + { + "name": "model.layers.2.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19406848 + }, + { + "name": "model.layers.2.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20455424 + } + ], + "md5sum": "1d2574fa780f4f5c064e1ecf6a6445cd" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9617e1b605348261f8837f6a5628e50f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 189530112, + "records": [ + { + "name": "model.tok_embeddings.q_weight", + "shape": [ + 92544, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 189530112, + "byteOffset": 0 + } + ], + "md5sum": "ece99fb877ca17973fa9790d17b18a3a" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32604160, + "records": [ + { + "name": "model.layers.2.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.2.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 1572864 + }, + { + "name": "model.tok_embeddings.q_scale", + "shape": [ + 92544, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 23691264, + "byteOffset": 8912896 + } + ], + "md5sum": "07234e14d3781a1a844f6207d2a7bf8d" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "60a1955d05548a3e9f74c20d3034b27f" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c4195ad1b5fdde1f2fa2ad4dae40635d" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.10.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.10.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.10.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.10.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23601152 + } + ], + "md5sum": "71fd550d89f57749c5e839fce035a763" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b76828c6d0f2ddd12a200f2bc3fba3a4" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.10.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.10.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.11.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.11.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.11.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.11.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + } + ], + "md5sum": "0b08410b447fa4616b33649d27f9f52d" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b9ed774a881ca85f8f4e606e0d6d5483" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0076cfdb8aa72e6d58df09c7127eb278" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "model.layers.11.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.7.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.7.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 7348224 + }, + { + "name": "model.layers.7.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 14688256 + }, + { + "name": "model.layers.7.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18358272 + }, + { + "name": "model.layers.8.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18366464 + }, + { + "name": "model.layers.8.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 26755072 + } + ], + "md5sum": "30876b2267b1b29869c844b98dd3beea" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "04cd5c4038c24c63ee5c1d0a4c95a704" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cac9f52d6b6b10ca14db75dc5dc5b20b" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25182208, + "records": [ + { + "name": "model.layers.8.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.8.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.8.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.8.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21504000 + }, + { + "name": "model.layers.8.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25174016 + } + ], + "md5sum": "7e5e6a77e1c410727523d4bd5582fe64" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4019a19d9a9d539f6a4d507e41005225" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7148a051e35378ea4daa4763fefa81fb" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.9.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.9.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.9.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.9.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23601152 + } + ], + "md5sum": "ab444011e9e8098b448375f3b0984407" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.9.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.9.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.11.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.layers.11.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 3686400 + } + ], + "md5sum": "70290e0c2ac33fe6efe6950112d8ab10" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "19b3e3788e023aab28d1a18235554bb5" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.11.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.11.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.12.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.12.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.12.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.12.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + }, + { + "name": "model.layers.12.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27271168 + } + ], + "md5sum": "eb11beb1c9e5700b06f695e4b10e19c9" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "704fa12d0eaac9511e5f68a0e3a4ce39" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33038336, + "records": [ + { + "name": "model.layers.12.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.12.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7340032 + }, + { + "name": "model.layers.12.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.13.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11018240 + }, + { + "name": "model.layers.13.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19406848 + }, + { + "name": "model.layers.13.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20455424 + } + ], + "md5sum": "ea8937133a702143225be2a8aed7b869" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "12085db559930d58eeb982e4e96f2b1f" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d0c678d83dc9d7dcb1219b52d8d505f7" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.13.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.13.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.13.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 1581056 + }, + { + "name": "model.layers.13.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 8921088 + }, + { + "name": "model.layers.13.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12591104 + }, + { + "name": "model.layers.14.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12599296 + }, + { + "name": "model.layers.14.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 20987904 + } + ], + "md5sum": "36f991978c68b4b3c21ee79e717bcfcd" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1715786896c7a55055f42e301d6f4367" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bb8bc7de495e59031b05a17d5d1fe3b0" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25182208, + "records": [ + { + "name": "model.layers.14.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.14.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.14.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21504000 + }, + { + "name": "model.layers.14.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25174016 + } + ], + "md5sum": "b915094a2d74945c042510e24ef79d8d" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e6e9e80e09cd620350ac74db0dd42c99" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4d5f0c146ed20ffc2f0ab04308c4acb4" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.15.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.15.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.15.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.15.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.15.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.15.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23601152 + } + ], + "md5sum": "6101c656793004d0c3abe53468c1ba34" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9ad8969f716c6cbec7bba5e28a0e650f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.15.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.15.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.16.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.16.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.16.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.16.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + }, + { + "name": "model.layers.16.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27271168 + } + ], + "md5sum": "f3dd95f67b099821420d0f025b8fe28b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9196853fe53bd5131d678eac280e269c" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 33038336, + "records": [ + { + "name": "model.layers.16.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.16.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7340032 + }, + { + "name": "model.layers.16.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.17.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11018240 + }, + { + "name": "model.layers.17.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19406848 + }, + { + "name": "model.layers.17.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20455424 + } + ], + "md5sum": "58f5adf8dda704ca53a073620f3abc01" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3d3152816734abc9dc50678d33f07e66" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c4e964b339ecf6ddc3368ecf13c86331" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.17.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.17.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.17.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 1581056 + }, + { + "name": "model.layers.17.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 8921088 + }, + { + "name": "model.layers.17.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12591104 + }, + { + "name": "model.layers.18.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12599296 + }, + { + "name": "model.layers.18.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 20987904 + } + ], + "md5sum": "aa8c4b20de5f6b2aef64820bd667eefb" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "fda6ff250690ebf2a8421ec1a7909e1a" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "77f0d3f89cb203b305cefe472e800c88" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25182208, + "records": [ + { + "name": "model.layers.18.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.18.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.18.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21504000 + }, + { + "name": "model.layers.18.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25174016 + } + ], + "md5sum": "d2776ceadf0d34c6ac191f5d66f8205e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "7ec06353645eff457917722242ad5db3" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "87e659fe9e9fde958047a1f2ab793aa9" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.19.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.19.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.19.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.19.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.19.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.19.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23601152 + } + ], + "md5sum": "8ff866572b4c2a08292392f01d8ca752" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0043281f3b7e969c386fede123a5fa8a" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.19.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.19.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.20.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.20.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.20.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.20.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + } + ], + "md5sum": "71fb7ff691f7075abf5ac59330dab8be" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "78a389cff3e2fe1adcc870fed738a2e3" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.20.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.2.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.2.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7348224 + }, + { + "name": "model.layers.2.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11018240 + }, + { + "name": "model.layers.3.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11026432 + }, + { + "name": "model.layers.3.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19415040 + }, + { + "name": "model.layers.3.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "5dd67f4b2cab6851d0561870e04f2c7f" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d972e8547b42c7af3154460764e9aa43" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d8baabad9615ce3b96b869c14da54551" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.3.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.3.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.3.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 1581056 + }, + { + "name": "model.layers.3.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 8921088 + }, + { + "name": "model.layers.3.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12591104 + }, + { + "name": "model.layers.4.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12599296 + }, + { + "name": "model.layers.4.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 20987904 + } + ], + "md5sum": "d10f8920d4cb656f364e57426aa94629" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3422f6cb1368a9f8c3d718fac58fd944" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "594134f0a343a9fedeac959b10ef5561" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25182208, + "records": [ + { + "name": "model.layers.4.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.4.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.4.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.4.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21504000 + }, + { + "name": "model.layers.4.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25174016 + } + ], + "md5sum": "e713f3db94d7784a9cdad7994b547169" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3479c82ae4879af2f9cc5be7f2236f8d" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f019722fc2af5404055e6a949939afbe" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.5.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.5.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.5.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.5.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23601152 + } + ], + "md5sum": "ec087c107d317ed9ce7652a9e3b65c45" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f1fb576f10d2933c04d0c4f52ea3659f" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.5.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.5.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.6.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.6.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.6.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.6.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + }, + { + "name": "model.layers.6.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27271168 + } + ], + "md5sum": "f01900d0604dd696e7f8613e4293de8c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "858c9f4409d26fa013fbd18f666050b0" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 33038336, + "records": [ + { + "name": "model.layers.6.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.6.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7340032 + }, + { + "name": "model.layers.6.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.7.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11018240 + }, + { + "name": "model.layers.7.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19406848 + }, + { + "name": "model.layers.7.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20455424 + } + ], + "md5sum": "cc096ac89112da04e942402d37c10363" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.7.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.20.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.20.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 1581056 + } + ], + "md5sum": "7b2cbc2a063da330f727c1d3574a7407" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "51651c19b7aac5a187c59e6e2036d266" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.20.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.20.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.21.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.21.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.21.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + }, + { + "name": "model.layers.21.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27271168 + } + ], + "md5sum": "42da7facdeb21669403ea6ff9ec1a900" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2b5aaef7dd0aa3d2068ea197597ffce3" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 33038336, + "records": [ + { + "name": "model.layers.21.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.21.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7340032 + }, + { + "name": "model.layers.21.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.22.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11018240 + }, + { + "name": "model.layers.22.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19406848 + }, + { + "name": "model.layers.22.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20455424 + } + ], + "md5sum": "f950a6804e7d125fc5182233a9b4efb4" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "61e9ba6fc81835167a9415a8d239e7fb" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1d6a7b45b01805baa866533827adc63a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.22.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.22.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.22.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 1581056 + }, + { + "name": "model.layers.22.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 8921088 + }, + { + "name": "model.layers.22.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12591104 + }, + { + "name": "model.layers.23.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12599296 + }, + { + "name": "model.layers.23.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 20987904 + } + ], + "md5sum": "3e921e1f003f271fd91d9e905d75f539" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "bfb30177e347d00861956fe20bd720bd" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d684bab08a1e01b50c1f8d35aa82444c" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 25182208, + "records": [ + { + "name": "model.layers.23.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.23.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.23.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21504000 + }, + { + "name": "model.layers.23.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25174016 + } + ], + "md5sum": "f8bf6b7c4805bd81589370e666759892" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4814495d3aa21849181182c9f56c7461" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "dd00c482bf2ea60602082c760e217ed2" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.24.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.24.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.24.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.24.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.24.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.24.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23601152 + } + ], + "md5sum": "6aca9a5982181455f6ab695f43e3e63d" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2dc3f4e0bad98eb24e61f721756003c6" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.24.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.24.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.25.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.25.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.25.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.25.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + }, + { + "name": "model.layers.25.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27271168 + } + ], + "md5sum": "b88e068f612a35284e73332afaec2c8f" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "470196e1836a12c4af035ddcd0cff3d8" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 33038336, + "records": [ + { + "name": "model.layers.25.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.25.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7340032 + }, + { + "name": "model.layers.25.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.26.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11018240 + }, + { + "name": "model.layers.26.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19406848 + }, + { + "name": "model.layers.26.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20455424 + } + ], + "md5sum": "4a06d55f5e53d43f511148b55fa733e0" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1b537be7a887aa7ed3635f6dcc367494" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3c1ed3d9d743664df0c91a333477f9d5" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.26.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.26.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.26.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 1581056 + }, + { + "name": "model.layers.26.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 8921088 + }, + { + "name": "model.layers.26.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12591104 + }, + { + "name": "model.layers.27.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12599296 + }, + { + "name": "model.layers.27.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 20987904 + } + ], + "md5sum": "2d482253dab5b1243946130481914231" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "36558dabee89ee9c45b16a51b2147c93" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ae2efe6107776384dcd08a919274b743" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 25182208, + "records": [ + { + "name": "model.layers.27.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.27.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.27.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21504000 + }, + { + "name": "model.layers.27.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25174016 + } + ], + "md5sum": "1d5ec44b356ca6c44840a16fe87dba70" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "dbce60abbe0edc45dca0ec8fad700908" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "961eca288d4a7e06514bea9ef63b0d19" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.28.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.28.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.28.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.28.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.28.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.28.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23601152 + } + ], + "md5sum": "14d14ac5b90af80d5861369fd880a557" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "99d00939a26b1a1d3bfafdad0730adb3" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.28.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.28.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.29.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.29.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.29.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13115392 + }, + { + "name": "model.layers.29.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25698304 + } + ], + "md5sum": "4289b97d5c4c5e42e76181fc8a516bf0" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ed72009a9d6ab1c93e849eb98efc0828" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.29.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.29.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.29.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7348224 + }, + { + "name": "model.layers.29.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11018240 + }, + { + "name": "model.layers.30.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11026432 + }, + { + "name": "model.layers.30.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19415040 + }, + { + "name": "model.layers.30.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "0dc7912bd4be804dcdbab36382eac92b" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9f15d4264369433b484b9b6e126284e3" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c3f5bfb71d791518e77f79a11d23118b" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.30.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.30.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.30.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 1581056 + }, + { + "name": "model.layers.30.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 8921088 + }, + { + "name": "model.layers.30.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12591104 + }, + { + "name": "model.layers.31.attention.wo.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12599296 + }, + { + "name": "model.layers.31.attention.wo.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 20987904 + } + ], + "md5sum": "c19d1584b937571acc372662949c0233" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.feed_forward.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "01f654a159939ffa9c4333a6f2ef9ea1" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.feed_forward.w2.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "847acd83639066adff8d9bde718704a4" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 189530112, + "records": [ + { + "name": "output.q_weight", + "shape": [ + 92544, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 189530112, + "byteOffset": 0 + } + ], + "md5sum": "065ed01010652f1deefb525fb6953785" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 23691264, + "records": [ + { + "name": "output.q_scale", + "shape": [ + 92544, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 23691264, + "byteOffset": 0 + } + ], + "md5sum": "b4394df33e7ca3750ab376d5f1a601a9" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 25190400, + "records": [ + { + "name": "model.layers.31.attention.wqkv.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.31.attention.wqkv.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.31.attention_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.31.feed_forward.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.31.feed_forward.w2.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21504000 + }, + { + "name": "model.layers.31.ffn_norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25174016 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25182208 + } + ], + "md5sum": "1db92a6218c6569b29d16d616de14cd1" + } + ] +} \ No newline at end of file