diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,10847 @@ +{ + "metadata": { + "ParamSize": 773, + "ParamBytes": 18429667328.0, + "BitsPerParam": 3.3158450877839467 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 388956160, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 151936, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 388956160, + "byteOffset": 0 + } + ], + "md5sum": "f65e025616b638c6e579a525e7d84ade" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48619520, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 151936, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 48619520, + "byteOffset": 0 + } + ], + "md5sum": "f6c3558bb43bec515d7f706e0d52bf0e" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "3407f6a05e62f23abce93bb46050e1fc" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "9acefaa0a06fb590388a3daf17b96681" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "71b8cf205c5b4414a9ec1459dc481d6a" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "e202f1619a3e6736385f17a943b9078c" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 32798720, + "records": [ + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 10240 + }, + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8202240 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24586240 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24596480 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 24606720 + } + ], + "md5sum": "6a14644832d0dad396cff57b1247c0b0" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "08f5e6f01adc87ba9ae29fdfdf58a788" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "19eed171dc0c58747134215e51650f67" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 388956160, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 151936, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 388956160, + "byteOffset": 0 + } + ], + "md5sum": "1ee7001d1f6023f0a9b279862d7add41" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 48619520, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 151936, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 48619520, + "byteOffset": 0 + } + ], + "md5sum": "aeb8cab9fe9d11913ae8e88f8d61efda" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "7659b8ad4091a08f1e04af731e24a205" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "89a9d9e90ea9c168eb8e05b25ec1c77e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.63.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.63.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22292992 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22303232 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22313472 + } + ], + "md5sum": "53a66a1088f62f639b6118a9e2dae37c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "0b40f438d5d729f7501b318fd5dea984" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "c4ae238ac16c76387a61f13334691e3a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "4ab582faeb3912ecbd36d6539942bac2" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "33646b000be65b6b9b63fbd1077f8f9a" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.0.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.0.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22292992 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22303232 + } + ], + "md5sum": "bb0679b36f59a698f21801c4e3e7b293" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "8e726424af69ba9f6efc1db4a678c1f1" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "048f943f1d465bdfb7b7909bc02acdb1" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "70c902d47b0b6a6c18c96d6306f10e44" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 22292992, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.1.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.1.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + } + ], + "md5sum": "aa9050271fc2a30b77712c8960c374c9" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "deea0546d1b1338d7c66d1c174be3266" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "8ff22b4369523252037bc369422b7a19" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "2e4584dcbc38d34994d15a31c068316c" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "1fe043e7f6b31dd0481d5d538719da98" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.2.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + } + ], + "md5sum": "061a8be85d6ed49f984f8183374a8c05" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "b576367f1c3ab08f1be18fa52b026911" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "ebe1e4ec23babfe77286fb529cd76bf1" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 24596480, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16394240 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 16404480 + } + ], + "md5sum": "42e200336e5fbe038a951377e079f804" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "f6006fde4c4f8d57b214a1bd18864634" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "18baaae848e9b8270f8ceeb5b68c014c" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "aa2000b011dfcc74f93c7347445a1bfa" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "6ce5fe1507fcd9e626c4508c4a04a4c3" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.11.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.11.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22292992 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22303232 + } + ], + "md5sum": "087c3fd5b5095392c755607a12dd9026" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "71cf5cf4033aaf02f1f26de52653e5ec" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "ee0ae0960c29894e75ca1cd476bec78b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "da1e312091fd7d7539facc1681d128a6" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "576968fe7d5ecb93e16086dd6b43fb9d" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.12.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.12.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22292992 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22303232 + } + ], + "md5sum": "a648b9894082cfdf5756c2a967d24c92" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "1affb455d629eff5e8a053b09b29656e" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "24f2e22f0b68b3855116db5f61904f36" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "b84c80f59c0c1982fa76bf0beabb5364" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 22292992, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.13.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.13.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + } + ], + "md5sum": "f8a97d3a6fa89f42fad6ae1fcaab56fa" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "8725a46ca2de2925dec89af56bc1ff4a" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "a734c9dbe74551fb414830e008ef7b2d" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "0a21a63aff176722fadfb502c8d45d67" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "b279094e339310bfceefdefd8f09c555" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "f3a8e77d5a0726e1922f7d7179c85d4b" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 28191744, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.14.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.10.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282752 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22283008 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 25559808 + }, + { + "name": "model.layers.10.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 28181248 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28181504 + } + ], + "md5sum": "69d4e256c5b4da83e2247c6bd0615df2" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "46fc4f05d3a9ee9dee89e221e8a021fc" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "f2a1983ae71c23a5c94d94dd4de62280" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "933de89e518b95d3a1cdd7a777b05610" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24586240 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 24596480 + } + ], + "md5sum": "fa1476bd69b8c5a21bb395d1563fb270" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "10fc38303383e5098c96238e98312bb3" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "6986a43a58ff63c7f1cce17b75e468a2" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "94fbebdad7c135fbc63f1419d0783caa" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "b84d05a6e0d0c56a77faa809f7c68d19" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.7.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.7.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22292992 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22303232 + } + ], + "md5sum": "1e2bce2608c50a0b6af24b6c6f87f594" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "538b1d4c50f80df8e4c41b453e1be427" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "30952542e89f7e61bc1b21929f1fdeab" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "9e1a608aecde6a56392d62be0c1662b1" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "860603a6ac274452e01fc7bb09fae36e" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.8.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.8.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22292992 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22303232 + } + ], + "md5sum": "c239eb7bcb9dedb1f1c64f84feb91fed" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "50eabf23df96d8590d1bec4cfa4ab4bb" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "086b204b52382d1d48393f16d9d19e6b" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "295f415c453d59fb132a1ab34ab0625b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "849735b756e31666b2391607b9bcf7c2" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 30515712, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16384000 + }, + { + "name": "model.layers.9.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16394240 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16394496 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19671296 + }, + { + "name": "model.layers.9.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22292736 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22292992 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22303232 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30505472 + } + ], + "md5sum": "06281d6ab7bd8a67eeac1198bb0411ea" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "4bea88945e11221b40914c8b039ff74f" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "7aed86e0c75a4a1bbe43b69fe632e582" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "fc55ab59f0faa93a3433b48956eec962" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "81e3a57ce959fed3bd3c005959d1d07f" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.15.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.15.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "ac80f779b3cf8214a4c4f235c39f5dc4" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "89d47abd77926d132407d75555a170b3" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "4565bad4c3c89135bc9c8824c20424d0" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "da05a191839380fbe535ab64febedb25" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "fe39829c478d8735dc61d2fa541b829b" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.16.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.16.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "6aa2c8b4a3c4e19047f96e2e06521396" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "4026a87cb81b9418fcbc974e2d61e2a6" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "89d9ba046daee6f5310cabddf96a394d" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "d3ef4bad4ec0896532e6bb69e78798b3" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "df3bdeb39db90abaeac5d8bd86e06805" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.17.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.17.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "14d13f8c207a828cd1168080f404c12d" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "dbd31757ef083bb4252488476cb73ed9" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "e556c2d69d44daf59df9f75d21aad6c7" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "389913fff873f252b315569498847b88" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "d6ca949388ac0f012862bb596562ff2c" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.18.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "83ca9462bb37bec27d0e67b18f8f8d21" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "c5a2f9e0b8e06fb877592fe70267a5fd" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "917a692c361f8f64c43e9ad1909fd304" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "3a6bb171da201ffde769314a9b016a22" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "d67662c6212498becb35ba4202c73ecb" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.19.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.19.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "2879558d96c5f4bebaf4528d383572ab" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "210ea759d0cfc4da9c06b334e9edfc40" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "486307f6feacf3aa1c0575520b3812eb" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "e1e3502c818389a3368bc391c67f384f" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "7a2d8d1300d9f0561759c46e15f674c6" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.20.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.20.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "d502eb7210228de07a91ca6fffdc1133" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "aa97dbfe55fa92f5b6168141eb48bc85" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "a2de9d6e1529650f38ea850b3c0e4a90" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "b6e24f9958646c298bde3f580f1c4afa" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "67125a82f8bd04555e0db7ab5538273f" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.21.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.21.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "81c7cf7e75d6bcc66f3568a3cf111a5c" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "5a0206edc2ccfd555ad68e819b0ac45b" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "00376c29d7d82ba5738a41553cd3d5b9" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "199c611d89098fa5b5c7276d346fd15b" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "97e1aac4b535f3580bd5f702e570155e" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.22.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "bf9a965048ae71f4d483816f58ac1ec3" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "dfbbe2f52c1cd4e448f22637c3a1931f" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "c298b5268d6c802b352a82c99c34e4e7" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "82cba5f1fc3d36dd606c15278db1b9f0" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "941f8aebdac48abc3f30d23625358ed4" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.3.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.3.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "fe83163f7aaa4c3c8a1e45f966f01337" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "bf8a6f435073c013066fd0d372ac122f" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "63312a52f9788de4beb88d53fbab0ea3" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "197f40197ba9bd3487c7adbf6463ec5d" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "2a97b3455ad99403a1b074c4208787c9" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.4.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.4.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "02595ffb668aa915cee8d492d3875a94" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "8324b5900cd5b963348417c118dfc40d" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "459aa08048a43231b36193c9312c53c1" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "3b186a2ffea4267182892e4e956d8be7" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "da7e0e3be05647deb6de5a190e04dd61" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 30485248, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.5.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.5.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.6.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484992 + } + ], + "md5sum": "9a7b9515b680a3f99b8ffeba2a670d96" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "1ba52fab379c0bf2ff728024481d0a3f" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 26880256, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 3276800 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 24248320 + }, + { + "name": "model.layers.6.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26869760 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26870016 + } + ], + "md5sum": "032b79593d071bbb8b9971d8d719d5b0" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "eeb2497ed6bdd0e99f833e639b43b731" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "0faf796c75d0879abfdca6af43168e28" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "95aa6264069148d1799545b79a0fa06f" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 32798976, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8192000 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8202240 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 8212480 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 16404480 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32788480 + }, + { + "name": "model.layers.23.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32798720 + } + ], + "md5sum": "8cc79647e50bcb2fd9d9ef2df5739766" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "f04124b9a9746073da7f76c09af94046" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 26880256, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 3276800 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 24248320 + }, + { + "name": "model.layers.23.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26869760 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26870016 + } + ], + "md5sum": "bc135851dc4858590d211b8a716ee0c4" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "fb573c5896008712d4f022de40bda4df" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "f31e60a124bec20c4d8574037acd2dcd" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "c07a840f95ced1bcdeff7dea3399e6cf" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "da88e64824d1c4a5b2e5d3a1f22f194a" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.24.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.24.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "f6b2ced822f0b5a12b04a13dcbdfb71b" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "d1db61c836ccb49711a0752c44229adc" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "0a70ae6c7572530de89813169a5e496d" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "d2407db92f8c0c2925953ca056076ddb" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "34f4a16ef173b9d88d9d896638cdc4cf" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.25.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.25.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "5fa2aee88126640345589ee38114938b" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "e9172747da1c4ed192e964571ed0d6a4" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "abdf280e7d181ae089579f683587ffaa" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "0d1918d0939aa80577d99e62e2437f2e" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "2b3071f1f14ff05551eb4df311edd533" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.26.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "639bc66398982cb648d1b78ea0e88c66" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "f00be798bcb66ce4fc3265d4143b0fea" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "37ab3df0dc72deda8b8a43c23c0bd27a" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "9238ce770d2d6666de11b3aef4c24db0" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "be5dc382cd2702d0b45dd8053777e316" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.27.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.27.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "4ac19ab49a35c72bdb478bc748f4eea1" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "64a854d9525fac6aa27407717aa8da68" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "dd58552d60fabd747a033bf0116e65b5" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "56cb795dc47e1fcf4f0766156942b4d2" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "27618ddab4d386e7c6151c506fa9ced2" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.28.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.28.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "5a6599eb9d7661187deca434bb9cedd0" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "683a48a355c1bcd1efa7109aae7b35bd" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "0db1b00469f9833943102e238b6e60c9" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "b7da88b5bc91adb24b53b432ecfd18f3" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "f21f38fca31c6cbe86df3fb2e24ca91b" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.29.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.29.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "c3664c895d7d0064d0bc68d9bec7f0e1" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "afa5af6e733f19bbfb0df50855814a33" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "592b8a11a8e076fc85db9a26d413d83a" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "82b58d2df5093da1e7c5031ec8d6d258" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "586ef27e690b641e125f5a5155fbadad" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.30.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "3a5cf523e8109a709a01996551ebb4b0" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "5e5a5a58530ea1688b14d44132540881" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "23282698c3aaa5415bc084dc5cb989e3" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "611f607d16ded8266942a599f62bf397" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "997e100ffc31e2b62518c1c2f4237693" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.31.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.31.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "1dfd74613c59d4a1c602e278f50b6509" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "ce950d9277e44c063ead55dfa0255ce7" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "76f5a1cfcb8833fc10baa0ae82e62d96" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "ff05f2f82c048e67bb2c3cc84c33e888" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "ba2cdf835d4dad254ce7523e28bc7d28" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.32.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.32.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "8c9ee38b7574bab6e079bafe247baa1f" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "be442f4adbed1f02f78d2d37eda53ecb" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "d6c8fa321ed4500b2ea03f0a6caa79f0" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "c54b4783c11cf8ff85b1079047166954" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "99626fa0503b23ce1783bb25277627f7" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.33.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.33.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "fbbf06f0a1b273aab50f910a12a60814" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "36e7573ab0f48da0dace79a591624f27" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "b8168dc0ed65b26a309fb241a46c3f05" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "288d016229b979de8f385b755a204dd4" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "706a761282b45c78a0a60293244747f0" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.34.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "dfeccbe0acf4b54491b6284d413ccf49" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "d1cc8f2e07a536f8b066d04e1f323988" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "e5d8c7fe25c82287d5ea9de47b029f3e" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "47454a1cb7e65dc4bf68a4d5a4e96fe6" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "78facac84b5f873d76406923a1d9807e" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.35.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.35.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "438fe1e1281600a18abd14171af833e7" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "6c70c899a516bf9093db06c7a032ba58" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "2078c4e4a9f1b26c99ad0d6e675d5df9" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "81979cfb546eeaa3b7f982aabfefee3c" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "b029f7d7396215dc33185e9b24f3a4b5" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.36.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.36.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "bc3d0f28c4a7f241ded50b1e9dce0158" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "71f5503f4d35f6a2ea9364bb376598d5" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "432be239942c82ce241930ebebd1ab88" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "2195910814956f6938a00058e8f39e61" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "98add44b7b2bc09c5bd165c2b8bd5065" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.37.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.37.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "bf9cafe790e9fe380aa1216f8efd6a8a" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "91f972262a42621936e31ae646e7c524" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "eeab27254312dd9f39c1636b62eeed16" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "3e2c77320382f3feb69e5507eaa9d471" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "ce0a4222e5ac277481d0b4ba1ede4f6e" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.38.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "686d71be64bc333d5743327ba9526710" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "168568c8c8757b7667bda66395bee42e" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "c298a26ecaa36073d77bff3821712d11" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "3fc70f377cac91dfad3d034c22e57546" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "e46f5f0d9049e362a3f95885f9ece0be" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.39.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.39.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "aa431529e54e668a2e135f5338254def" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "7338edf96b727b3be819098a48410334" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "5d1e8dc493d27aa3c9e4a639ce0e95db" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "3511cb1c4d51416bc8e222bd96fd9cb6" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "8a5991ae8e75da9e022f61b9e407980f" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.40.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.40.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "3757ad98f7893e3733ce53e66a7095bb" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "a00a314cb2db85127f8f6ef232d52b11" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "25a7c660c2209fa0a28e86ad00ba6d40" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "be8afca19b76f055a14cc384e5159d08" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "530956cdab5a65603fe051c031e5ecd1" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.41.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.41.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "fd310046c123ea12236c6e66dc37f78d" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "595da341378b12ce94066105ae4dd583" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "3d883349d94f43d215d2424a7902bd1f" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "dd7b681f682a8e254f847b60b90d9071" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "68dc4e0808536aeceddf0274e129389f" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.42.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "3e86229e046edde0acb506576c0c122c" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "5ede01957d03f35e9046dd3ab3b3a90a" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "3586fca6d765914e571cd98f25d3b74f" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "b04aed94b72bf1dc75a67616edc2324f" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "e450da402ec2aeccf4c494f87be7f21e" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.43.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.43.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "e2800ce39a333eee0235852cab331da6" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "502d5e3c2faac5171d59fceac9fd7d48" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "fc7e5ecd47fd05a99fef08c3aa93d4d0" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "543c79b86f59059aa92380d2c1d01129" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "0df1ebe186afd87bdfc59ba6e5208d81" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.44.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.44.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "9e2255cb9e5ffbda6c3834b550aed1e4" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "599f58d0872a79f6f4759f74438e6146" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "3c8c4a392cd2485d2593edb2d1da186d" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "69c6a31a9b11211bbbba1f2a946ec342" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "76ce0cc368a43d1e226843814b3e6c7e" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.45.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.45.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "c89c4e9fefa9cdb8008d1138bae98076" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "1c3db07224540b6c9e6fb38bfabd4529" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "3ec71e47612e3b830c9bd10877e1ec16" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "fabb228b7e0ba37f2697382868cee9cd" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "87e35a3b68c55d035fe0676a499b9a3e" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.46.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "82950e0f21d8baaa2db8df120bd7382f" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "edceb857e7fa8dcfc3d85d4e89db1ed4" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "58ef498e92d24909c40b5b01e69d1547" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "aa1592d7a0279b497557764557de36f1" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "8b83f4e55e7a55a1a54378200df61628" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.47.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.47.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "9756d87fa8368480bd2ff15df7e6976f" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "44ba1fa9f42b48e1c00abbedcd0d216a" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "40da50704366872c8f7efdd03440fdc0" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "72027b84faaac5e198ffe47e91ea9756" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "a184d551523b5d4489299c10547df9bc" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.48.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.48.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "976ef26633c3282df1849cc67dc5f4d0" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "869b2043619fdc1ce547c70a8462cfb9" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "4f014bf1c337909df1a6f3a368ea149e" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "73bcd6c99104c3d4a5b6831b8d713cef" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "ca6ac2a923dc6f3d378f998f591c4bed" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.49.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.49.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "49e355f748b6502559c91f4a1441881c" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "7432eea377d638581dd55ceedfc14f80" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "6412ea28b1f76ae48db1cdaf68c9ef1b" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "9b2c4faf1040f972d375de7ad93553be" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "90ee2c6954282dc3465a92595897a63d" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.50.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "921de458e4c56364886a1aba1595fdbb" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "fe4e6f79e82bfc0f5ea0185949962a74" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "1c2775b1e705ee64fd83c6f7949ecadc" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "c650d941bea968047c039b27533317f1" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "e8b63fa80d0a0ffa74f6543b71190883" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.51.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.51.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "03a852d560ad4e0b9ecf863a8aad805c" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "871e889b692f98b07a4ff5fb05988a75" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "d40d0b5358e811f2a6505252705ede96" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "998f39002dcac37748d5aaeecc28d762" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "18a3df6d2cc09177d1efd78952751310" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.52.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.52.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "7601a5f6ea6af5327557dd6245e094f6" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "b13081cfb01086a2f1c5328234498133" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "37786838a1e946bef819f57c8a502a14" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "ba2c60b3f38d7cf74b7e2e526ab235eb" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "bb33c9d65279dfa24896c6a0c54e8415" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.53.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.53.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "4add14f4b694e1f4d2e7a3e03a53fde1" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "32b79cedce9004f85ee604e523a202de" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "082569a436d5377acaac16a4bf1cbd4b" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "ff913515990cab7801ea954bef9274bc" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "63f83b35d35d1276d7f07874477547bb" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.54.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "df8d5d83c4acd5156118865762b76fa3" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "2d49944a12a2e68fcee4504346887545" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "c80b55d82649bd6775a6d8d9d97a6e50" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "0a2f621254adf85478c2440f30bc0b41" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "bf3c47e1189ee6a224aa412d6cb7fa73" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.55.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.55.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "9f0f975b489aa5a2bfa4105035e2f0c1" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "812bbb6280740bcebeb9e2e283d1e3c1" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "eb4c8e5296ec74587e2ad79b55f6a27c" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "84a29f1280010456bc1c817c2c09fab8" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "c819aca05124a7eb7db4bd99f5a49d05" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.56.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.56.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "55b36b73250639f6ac4761cb897e1998" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "dce95153b99ffed703eca9c9ce186600" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "5153a0bfa90af15c0222d82d85a62769" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "b298ee7a2449ea6a873b021e42c3504c" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "c0137bcc4ae6d095913e2cfd0b09695e" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 30484992, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.57.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.57.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + } + ], + "md5sum": "b6a280e74452ac44db9eb9f7ec5104b2" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "8dd0d40edca980deda1b1ec107661190" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "4767c7a8038d499593c43c8e39aea1c3" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "6c088758a5bc40fcfa12b7b3bc1c844e" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "8ddc870f3caad5e04af4e0cd72d0d7c6" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 30505472, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.58.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 16384000 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16384256 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19661056 + }, + { + "name": "model.layers.58.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22282496 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22282752 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22292992 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30495232 + } + ], + "md5sum": "d9815c4e5401e153bd0170adb4a56b09" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "47853831c18192de5e34a6c8991db682" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "17949f7802fa333edb41a4500915f040" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "75c3d25474e94f4a0936b4bc452873c6" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "d18b829c5fc9353f15ce80bd094e648f" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.59.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.59.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "05df94df489b0a83d1e69c7b8ee561b0" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "a84d5f3c49af604b9e57fe0d6c7de64d" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "0c9d8f5dd85236c69e5e6053b924b61e" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "f11f534287ed027c624857b1781fdb1f" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3200 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "cb88c2cdccd1a0723fe2def12cbfc450" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 30495232, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.60.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.60.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30484992 + } + ], + "md5sum": "6dd1bcd7d5f936e0e09a14f0c96bc3b5" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 51200, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "0a4682541642adf9a4e0b8465e9b6535" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "708abcdd6c283f84341bfa63d12fffd6" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 20971520, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 0 + } + ], + "md5sum": "9c53c4b87800305022ac411211c7b8f8" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "bcd380db479d4689df0c5e59b9cc7b0f" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 30485248, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 5120, + 800 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 51200, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384000, + "byteOffset": 8192000 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24576000 + }, + { + "name": "model.layers.61.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24586240 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24586496 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 27863296 + }, + { + "name": "model.layers.61.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484736 + }, + { + "name": "model.layers.62.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30484992 + } + ], + "md5sum": "3b0e108d9073d323395bdb615498d40c" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 26870016, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20971520, + "byteOffset": 3276800 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 24248320 + }, + { + "name": "model.layers.62.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26869760 + } + ], + "md5sum": "989aace9350195edefcd43414666b4d7" + } + ] +} \ No newline at end of file