diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,10159 @@ +{ + "metadata": { + "ParamSize": 709, + "ParamBytes": 20481200128.0, + "BitsPerParam": 4.3530794586599075 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "e34ee27d8486a6b141dcd58cef2daeed" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "77f31d7f38b10daf6526334fffff5966" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3da6818607eb828bff0bbb67094d9901" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2a3b5b18e565893373cd2e92e8163f0d" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "26268632d0732c3a6151cb8b7231335f" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 26572800, + "records": [ + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 10240 + }, + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8857600 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26552320 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26562560 + } + ], + "md5sum": "49cd4deab37aa71638f40bdf3446c8c0" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0f6563da753157fe594c83c78afadfa8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "16ae4d2f54ed7bef07dd4348672f1f28" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "509bebb72f1919fe175d856c16fd9e32" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "11b944cbda53543c8aa74802d5e6a20e" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6c181bdb486e41c130a4dae602b31f9e" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9c82a5764d8a542ddc80bbe2ca1fb7b5" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c5a0c0fd11caf92895e3b6008ecf915c" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "20323d34c2b2560c477b5e5ad83ef5c0" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4cc758dd880778553b0726ccbecc81d2" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f1ae18683c15635143436c24cd5831b8" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "89866cf3f51eaf5835e7b8d137c14dee" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8f34190965a1794f0caa43980d97d1c3" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "0b69a19c79de3ba18f0f0eca49df1ee2" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "049a3eb118459269527743324bc323ac" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5d115a2ffe992e32c2cab95a694f2eaa" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "95755ab55c74f9db0ec4e062814fb00c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7090b12aea7aeba77de4e8819c9d81e4" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "fdf0b9c24ff2152c1170bc2ee03c2293" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b6fd0096deb396689951083a6540a1ce" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "dc19607146a93ec504c0c2cd5e36c775" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "7c6e5bcb00ab6ed06e44de3f8fcf96f2" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0f57668ed30262a87d07b2b520c5056d" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "16d506ab4cfcb941b435f51461dcd011" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "2c9989b8477d59e8a22cff792fd493d1" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "d8734e17849fd66ba8259fc9754c7d8c" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "61a5a4e9e8eb94950cde6dd762aff130" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d2146f0002c749e405e295b45709b8c6" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "2dabcf03b210dc1f9f0e0c4f16345677" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1f77912e6d7794a0064a02f294b088a0" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 25931776, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14755840 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14766080 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23613440 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23623680 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23638016 + } + ], + "md5sum": "450898689ad86c31e15beb1f175a2df8" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a23c6416047c2e0d6194543e7323c5e3" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7341626293f550a41d452f1a2a69cf9b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "7c70f5d0ede73aebf7f16469919ba55e" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "72e1407dbf72169e0d2b10cad0f6b976" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "6102b47f76f349ebc0909a356e23e8f4" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "eb3321cfc77023e44eb6337323d33c4d" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ae593f8e45a62c432447179ebc11414f" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f01b4a68a61e1c9efc1ff58b25cc5f8e" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "e0be010091c41590f97fb04c8554a1f6" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "ad0743eca1793bf8d520f59a1ebf3794" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ea62911b43448b4780b28d69c35d40fe" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "58e6fbd418149445d350f3d9aa029876" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "abc9a2ff1411e9f377bbbbce641c204d" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d104a7c7271cde5ab590070f597b7bad" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "4b452b13efb1501173f5635d8d042d62" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "dd1e31724eca7766bb45cfe0d278cdc6" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9acd21668349845f98520725162fd87c" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "445eb21bf5073949ad60f5eb5c960742" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9a678cd5436cd008a68b9f3d20e621c0" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "5b4d7fea7b74ea661b79b68cabe570a8" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9cf9f59c049c6e4eead79190b96162d1" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "01296928920d58656caa5cd8c8a49b06" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1e636c5a49d4f1d08ab5fe4743519648" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "e6f9b9d69c7689e18091cbea7ce0fd45" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "53e0caefebe3d85d57f90eee18345d66" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f36c02fa87b4ce27e0bf2d54422316f7" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b40491ae7533051ee419d2dcc7d6c30a" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d19e814907f7b6e03e1ea9d2c154c5d8" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "824311cebf94b788ea645d2db4bf9a8d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "86233c51c155b031f679ba483e20b408" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "84f4b017ebd8159ef349cd35b06d39b2" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1022390dd50540802442ebbe12dc8c43" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "970f35c85ff372df8456189c46f0659b" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c5588e884ce556672de6460afb839e54" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "a9535eac311eeb416e10d9351b414b7e" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d911fe74a24f642358751dc25a3a7a07" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3a7b1bb69f497d7d86329c9987b18167" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "bc0a3717244b294c814aabc6d6c10a15" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "93c6e0c14193f6cdcfc5e5528b669070" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "61755f2dd0512e511923f8ca31842854" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "4345cec03ece6abffaaace98d57b5856" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "405f64abdb7203f5b3a9522fe4553d50" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 28215296, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25907200 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 25921536 + } + ], + "md5sum": "6fd442aded27de8568b84004e8aa85c6" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ffc8d92e5427c20a8f9f6ebb5cabc619" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a73e283c98926321002664b3de1a935e" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "392f8d7128abbe0410777df39609515c" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cdf225644bc1b3be121aace401138f0d" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "387441129eb59f1eb198e39bdbb79cd8" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a28d80d6474a155d9df818bcf816423a" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ee303a13e5a9cc585dc3a38717ea93ea" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "39132743b75f88f06e9c73d7925572f2" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a36d02ab7f79e911c9d0e1c05b76035c" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "1230037f7ece073de193c5758d736184" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5cc353f95b5888822609911549d297d8" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5c0f058c0ccc7b895dad15f97762fd7c" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "794719637debf5848e7f7642d64c3742" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9252165853ea1826bd0b0766fdcc9425" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "5ee5b0711f9a887514d91618108757c6" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "57e3f4bbb3369052e44b54a5346b4dca" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c1ed983eb45ab6e8f3ed00721e17cee3" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "16dd4d8ace9600610312b1800422f129" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bdebf60bcdf5f1362f996788d8a08f05" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "3338183659e67c86059efc8bedf798cd" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0cbcdd011773cabbac2cec6602cdd458" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8c8cb2aaf02edd8b556e725df9495242" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "cc494d5dc09e088ce86764fe9802f6e6" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2625fcbe96e2912639a010d0a214efc5" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "56e2f38eb4dc549777d2ef4487b17735" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "98a2d5fda2b999c3c1811ece1bc53df2" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5fdef4c7e6c596f3d36b259da2d161ce" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "43f1d0c08c8d9fd0e7cbb7611c44ae96" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b728cc5ef27223307bfec3f3ccbd2d60" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "88459ffccc6bdb6659ba4b0e909bf53a" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cf9f0181cafd6b998775abb2740bd216" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "469c8c9ee6f847b7d948384f95584997" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "5582f8a1c74541eaa793c1defa066781" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "153933e0026396c2296b94d9dcf2e17f" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c159b007fc8f00ad7ff686683d32611d" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "21db36fafdcb2a64020bb31b7e2cc901" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c9f2997f6154c8383b44b6a8101025cf" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "cc5ffbf96891364f5b1b260d194f3e9f" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4d262b0f692ab46892b5538f6c79656e" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "16008ad09825a78bd94979284b892cd0" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "78cd756eb539cde6fe4eed5977fd255d" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "73fdee9836c6a8061b8dbc17315ebe26" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "20f3ba785ca831dc26591fe2bf0cafb2" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "6c329bd6c0a7d9c312175495804de170" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "04847b40526cb68634f93ae94cb7a6f7" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1a1d1690e091d552403161d82d96d605" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1fd8a9155d75754afa0e49d9376024e8" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b52e756fdbfc79f95750d3a4abc4f5ee" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a79ab2d5adba6be0bdaeda5260e53a4c" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 32495616, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23613440 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 23623680 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32471040 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32481280 + } + ], + "md5sum": "5b7a3d6a9193a8a7dcba3780e425b17b" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "13e894189096b4e9ea55c43a084ef544" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "83d7827077709ec43137e111b97abfe4" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a894475f0da34a23d918d717572d545c" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "fe634bdd4e04b896bc0996dd8c59a536" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 28215296, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25907200 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 25921536 + } + ], + "md5sum": "cc839d11e0bcda845c4efac797701d6e" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "041664aa66b65cec70ff7aec861476c5" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "849680969ea97f716404314b8605ad3f" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f567cae1883f989bbf51abff859def41" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f216ae387d2c6e71d01ef1502a7cf358" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "d55932caf21b594d8d7c8ef73625db67" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "39af219178807eb8d29d52ff1705cf82" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "4e89ae9a1b6f1cdf45014f6e84422fc6" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1818a93da67e2d6b3e4114d9a140dc26" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4262fa4747688090191d637c15a8b4f9" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "f0c27f6d099ec778f4c07b5e6a2d9b6c" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "13226242710a082d56690d922b33e91a" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "192dc3fd442b804ccc14821684136d28" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "34352a22491ba30039147c54871cde94" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a123ac620ce6e739df00522e61cf5a95" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "e33f0ea529407fef40d6ed6319a024f4" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "945602ba56c1d6630cc578dbbc34b97d" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "dfe2f75e8f897f6c02e35190b2dd0f22" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "5e9692f13a21fd42384ef0484a987096" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7ff14a5e6bc256a258a6f7e20f5a66ae" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "7968e1da8c75c903c40334d2ea589ac4" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b2fb22769e0e5dfde85273bdbecb5c17" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d888eba9bd76868315f50250a338e692" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "71f6dc5369a1963a7172afacd8094369" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "fc95291eba75e8a29043d217f30b0d96" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "d3b1de2c546b32497706ceb75cf1bfed" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fea0678f1700c39fc9527bb043db59e9" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e71bb0321e289d2dfe9eccb8aa5c948e" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9ea63b9134aff7dcc6383a1b6b9f5fb1" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c081657f92e005752710c4a279722fab" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "55582a5e30dae2b17b30c11fc9678a88" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8f22e7d37fd695f294f182fcfe21f065" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8e37dd1cf1e4605576b06f557e2eaac8" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "084807c8cf3cf8f6f8e49cff6669650c" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "00fe021f0809690be4cac0806c439c2c" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "09c704d0a60b8ca417517929f0b27712" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "14f3c22fba2e38aed2f4bf1fe3b4f932" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b402ccb2634031640afb690dc2c9c1e2" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "16e172325afd91d0dfaf3265754a3a17" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "08e148518eb34ed4a63a55c455eb3f6a" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7ac140ba37a3662b1018babd67c42dee" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "37c90f7c337742a7787d6a68fdcc3bc5" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2f58e68814b59e5a6c958af482e6baa0" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "80baf849f505fc705bed8c27d0825d13" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "c3a11fb3bcb17e0e0e776cae3f7d38fd" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "aef4e215d3a3b5ea6361960558274934" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9410e18004ea62c8b201693e27bd52f3" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "abb22bc7c25f23374e3d18a21a4024da" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "be4e9a6a9733581f17d4d490aadbfb21" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "73e9fda8d5b5884cf7ea237f5be0802c" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c4dfb54249c505d34600453b4513ac0e" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "920770737331c7c14385cffcea3badf5" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "dccfcff06bdfcab2af4d6fcc0c7f9963" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d50df911236f8c7ffb717db6391d35a5" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c6f84e75d775ab53b0797181104c5005" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6fe04c1d2f3bab97f531e1601bc1602c" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f98900ed23e2a37144fe0edab3c26de4" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "062e231cef669c04d311ced84b63e777" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d9abf96a4100a15c5d10e1875f82d789" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "a31c94633ee235fe2891697a01cf8623" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ecd66b2a861cd0a0fb496f14ab31c54d" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "14620574e40a0a631aa49cafe1645d76" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "0680f1eefbedcf6f43467f4e9e1a3e12" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a29332baa1907cf96622dd394694037e" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "389992cf395650b01b27c07069d49d63" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "57adc599990ae88965da51e0d003ff70" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1b31c4e58f3a60cc63f15d3fb711285f" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b53d6bd81fe3bf808e50929711ebae65" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a06e1336aa2e5356f2cf8fb4ca17782f" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "dcddb56d1f92afddb4a8daff00770ea3" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "40226e634e224d7b612a32c6158f7a9f" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7bd137ac5822218cbd904b831987a9c3" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "301b941759e05678939101788b2f8da7" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cf83d27b2f726ff0757d950a46c2f63f" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "401600e48a20c25f29f0e65ea5eda1cb" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a7c87331c1ab9f1da576e7c6803eb831" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "999451717d8429965de89e94b6607529" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6cc395a65e6e45f15b4b12ff02b97751" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cf00ff944988bad74c020a6e8f9bc30d" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "e05702f765bccfe65044725be045122f" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "81282e664e36083f3ee69a4ddd524044" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0627c7f758b3fab48a471262c3af88a2" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f31bb3d946c3ca850316690830f889ec" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1bbe8a72d5402ed5ca334aa1c1843715" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "22f19ecca21d192c74ef534774a622df" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "952a14c9b856e3ad757b27e9b0809929" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "871b092c211d0fe99e63fa98d8c0fc5a" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "8838bf24d39774616b11270872846aae" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4976d8c166889156c8456a569cc31976" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d27e924a1b3da3d468e4cd19306e679a" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "6b3e119d0b0aad300c3e003e62787000" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2f62b2efaeb94b9e3099b57bc5f34e19" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "e64b9e6e31f2fde147ee521107841fe6" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "5270d83c7df52157e1a65e25e5955ebc" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3dc3a50cac99b71b7af497675ae83766" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "352b64594cda8f773ab29c788850fff6" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f999bdae04d4337e5a4808e98d60e815" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "eb763087093e8953e07c80d15c9c4312" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "e50ba30f9ff062654affa12dd6e88c1a" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "dd017e97097175be83dfde4f5ecf35b2" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f935eba81e7cf7e3ce6fbc48fa21ab3e" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "293395cbdf4ac5a97bd2a5efcd16dcdf" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "83d4627f0b4e655f29440f1b842f92f4" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "5f463e0cf93dd273c80bf4d0efa90780" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0b26f1e062f530251d5e65da6b984807" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5395c916198b2f58ba597897f98abe43" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "bbb0d36e181ba0ad20fef1b2951eee46" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b6c5e4ce1c5379103c2fa0ba5aa1071a" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c8001167e08963993517e64b56ccd66d" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c45f6d8df01090658fbee96e25d66547" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d3a374e67acae5a2328ce21f4c32af8b" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "5c5b2038a9f6287a7c622135dcb9129e" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5bc7a42f4446a8927763289a71e59bb2" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "ca5d14ce5fdd7712c83997767503fab5" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "33e09e54d612d72968e926ca91244e73" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b901a27855965be656d73b6334b36deb" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "15a7a2596b20953cf57564c0d9df4b81" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "75f5cafc7309bfd044eccf964ad5e701" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "fba5eee5b0f1e0f26871e805f7e40fec" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7b7ebfd601a6d1598aae982bcdd427be" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9db33aff374b4c09323ab69b16f492fd" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "92b49ca02ca699dd0672d6d84e8b54d7" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4d5d443090526cb55fd384e0142fefe1" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "bac9fa2376b71c05a8686e59462c05bf" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f1a20f37d3bc5b403275e0394f239154" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9b6e168416ab2ce64571512022d5c68b" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f08130304dbf3f5be1495e43932351cb" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "dcf52db7557fb825869c67c33e0347a7" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "bd7340e362e49b26e04cd47cb7fa658e" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "219c153cb9fbaabf779c6e6cf811f279" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8a382ca903b29a0b1041f64b1f77c155" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "7688f3e5c2641b73e6f40b05ca6c0b81" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "349eb007a6c3999c4cd1bde77944ac88" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "305dcccfb172c18ac6b23e840d787748" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a849581e2d400b9e19f3d6d640a5461d" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b51a31b72480ccfa4caa62c6daaa1444" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "bf4eb710a295adb46bccb4fe31a80866" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d8076061d162dda161588486a7f12318" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "44a3dcd1d79c60712173edf1c611feab" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "2c0cdb131ec88bcfdc8f61d1155d4571" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d64a8517c029052838a1fc57ed9c77cb" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "38049e575fec76c268f1121604e644e9" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "317a21cc3cf05afd4c9eab3b0d8312f4" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7860f822044f9ffbe1f59863c9dc8017" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "391596b684418b4d60e0790761435721" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "2b2a9d0364bce37706a20e6e640e48bd" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "44a89f3c0ab22ffdc8726b57919821dd" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "522a108155c16e56ab8ee7210942551d" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6c8b2c709d62b36ea638bd0d75720016" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f8a1ee7ff578d089061b5f0a1bf4d959" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "53b5036be296da340f0f5439b7ad8562" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "88d3fbca7455cbe129e3eb1471162937" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "bb5c0a0b5814aa8207cb0d973476c499" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5a7d06cb9a50035c76d1bd5c99c56bdb" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e303155aae5178520f6a4313aeb28df6" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "21f86b7a9c8c07ddca343432af9f29f3" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7f29f3dc72c1884d7feb4b5f894f09b6" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "005eef45385f6951993e76f831fd2c3b" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e797855be2799941136a7c87dbcd4970" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "72934b3b96e45744ef25a4f42f34a016" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "bae6066ae1bcfdd6630e983b1e1ff509" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "895a25dbcf855fe98e7dd2e04a1fcda0" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "17be5070dbc211000631ef96f0b8e83f" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bb0b11cfc435311e85d4e374c265f40b" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b85c1f1b5ef759cc95ebcb67c8486c9f" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6ece4bbcd14ae5d17551f5ff7bbc8835" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1ae1abed78552a772e18be00dc5ef2d5" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "1525e7532d03cd65467960ac4bf3ca3e" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "73a115ccb8b48c8e88a14e377d7c300c" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b7f2379645ec1c999b8fc4473a2db479" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "eed15ab9765453213ad837318a4933ff" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bc171f0db55215ff3be1b408db7b0b6c" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "5323c5b037a3f2d78d362e341d4b2f6e" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cec5c20841fa2cfb586ca9471890bf4b" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fa6e21cc92ffe5afed305177e077a115" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d00282478b009af864589f60cb4debba" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b1e9c536adbd12df489d417af8972272" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c37d996b23e2d5fa3c4e4b19559f18df" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "49b4c10c584c78743b13787414a09e27" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "52331be693248cb833d35ed4a35bb891" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "fb9f3b7f43ea2b8ee8ffd4c554e3146c" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ed1c6a63e5a8d0c81853e929f757f7ac" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "105794c4161fb6f8013bc55b92110b69" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 33110016, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14745600 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14759936 + } + ], + "md5sum": "6d19b7f1b285030d74e3a7b9cc9e33ce" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 17039360, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + } + ], + "md5sum": "4a6cd517241e11106608a17e5099f034" + } + ] +} \ No newline at end of file