{ "metadata": { "ParamSize": 709, "ParamBytes": 16895535104.0, "BitsPerParam": 3.0067237203635373 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 640, 152064 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "cacf1bff98d9edd9b33596360ccad5b1" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2d9df7baa76c00c092da812ffc5fe07c" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "192ba3484e9ad669dabe8e972dc5d640" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "9bbcf21358f633fa07f5bd406ccf8bb7" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "aa16db3b41b325c8fa7d519bf8f4a418" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7bf65766b3f35782ac917310c2018609" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33218560, "records": [ { "name": "lm_head.q_scale", "shape": [ 40, 152064 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12165120, "byteOffset": 0 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 12165120 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 12175360 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14387200 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18810880 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18821120 }, { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 40 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12165120, "byteOffset": 18831360 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30996480 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 31006720 } ], "md5sum": "873327fab7f5912b9ab1fa99c65fafe5" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 23371776, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4423680 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 4433920 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 4448256 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22798336 } ], "md5sum": "1f3a5641e4287d4cba78010d001b6283" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "99cafc69918c9c82e060e12eb00db992" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e2bb7494a17d164474f668eae84f1c05" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f8e5b6c1e5830052d8917b561fc9f7c8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "a80ffbe483ebe056da71407066dfeb51" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8824883022a4904c430a3be32a56e7f1" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dc986afee3548c615bf4690761bd29a7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "20092a0e7af9f2f992134e38854881f0" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "8861925bddba99e19231cc2a5526fd53" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "bd10d288f8cfb36424c18313d203c4cb" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7c3281f1d7e26d7a431612f704b8260c" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bb6e67963ab996b8c9c5fb6d0d5af376" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "88cc3f88ab412e8a0d5843be81076de9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6b73a291b7b8b397f44a6bd8816f4ba2" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25583616, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 2211840 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 6635520 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 6645760 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 6660096 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 25010176 } ], "md5sum": "775a226fafeedd3c6c6bb7eebedc615c" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ce9f4633447ad8040ead2a75b3b9b630" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5da9e7d4cb6153ee29f883e516237302" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c2008827c082a57f5acf362cc8ac14a6" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "6be295cfe8682c7489b4300df7790021" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c29a6a5f8c1afc8c27311ed42673c0b3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2ef634a999de5d416960cb7c2b954de9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "17d4011a69d0077e05ea735494018ce4" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "4955e7dfb029ae148232f8788a633a4f" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3e67c0158bd4913a9930e99cdd036070" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c403dd8c0483aedc1192a0f53b1a6319" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2907388c04fd9b8243dbcbb84031f233" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "aa0d36ee125b0ce89d14e40b4f876dda" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c3a40b32d27781a1574ce0aaf8cd92d9" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1a35cf37fe4d38b4df75ddfd9fa45361" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "bb6248c7e4f699a1deeb22a37b6e8296" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32239616, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 2211840 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 6635520 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 6645760 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 6656000 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 8867840 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13291520 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13301760 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13316096 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31666176 } ], "md5sum": "42f968f49879ce258e0e36aa8f0733cb" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e04da3b3d2273efec7736f960fedb971" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dae83c09228d4e0e459c16d35ff1173b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "186b6630ac22c616e833776c59ab38c4" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "70d73b31a4dcae19cb8f008c971477f5" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 22992896, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15738880 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15749120 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 15759360 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17971200 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22394880 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 22405120 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22419456 } ], "md5sum": "e6e5ad9abd8716dccff9eb9f78dd5e05" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "27ea21c7f13f9c04bcd046d2066e0736" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e1cf5c311a2739f2eed7c8c0633b568c" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "22e1b05118f30cc7551aef83427a08e1" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "b725c4215b84fada4f372a5957ae8c8b" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9cb5e49af1aa1d9dc5d7473b9f2f47ae" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "905387f0793474555488a8c40186bdaf" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f16bd771dba69e262ecf768404c38c31" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "384a37c0663c8c1f901951268bbaf9de" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "67f14217b7bf812c443185dcc5e35658" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3b17935e7dabcc6673e843b58912e818" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a7924e41e7808afee60e58c59f5f1467" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "093f258dab44112bfbe30a8fe5265eee" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9710bdd6005bee4102dc7aa01b0b9eae" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ced1c05c3964293ab5644f8ea0d5f774" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "076812261eb579a3b0fad1f56828d476" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "5a901c391c6eaf8aee1122b402340e22" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dd23934e2092dd0a765f4d032b759f3c" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "80607b8d48957a7163f4c448387a5027" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "c190d840eb8cfeb9e9be72c5cc12d74f" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "20d2e27b07306c7e779157ea8808d176" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9528a81d63aa2d55d63b4674f1489ef0" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b255d4f4ac3efd46d406073490c8f8c6" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "399b9ed21668b482c6833facb17435a2" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "aead98d0e7c1419a90d41876da5777a3" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d09853daa03571107a0529bfa807bc45" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "419e0991f9f4fac105351b3d37e694bf" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "f00b2c7a74dcf9284e91630f372a0df8" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "70384c6df33eef0fd2573dc9f10c58f7" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "377f2b89438ae3a0cced7ca184096156" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "39658fbad414f3685d0ab1a48513f9be" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "d68068eaf982a3775c8fd2ac5c0d0a6e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1a9e19ab83612a6295e8975463d75a1c" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3bcf9951f316404bfff0bda5c10cb13d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5c82ec65b695bdddda51258609b3ccc0" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "a9a0f3ba7673315ecbd46a1a21315421" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a2b97dde9f893581ccbf4d73fa9b9591" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2f276f8ef078c4f543ad201a976e2bc6" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "ba8eeddc78c4fb2372b407a3a57f75eb" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3c796ff151e23830d1ff326b761a5b01" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7eef67abe9f0ad5de761a9170ca38eef" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "177eeecfa81b81f3229bfad271b39257" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "d818a126c38a113e71ee1d7954f882e0" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d8a05aab05170d2cf269bbc670f02890" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "35baa7f5b88040063099c6a82d998b92" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1a32a5654ca286acbdcab76bf2fee3ac" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "019a58b58d0c6e3ee93df51de89362f1" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "36261004499c359457560cddb0fa66b5" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e15e0a8014e33531ec533070092a7cf8" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "dbb8476bcf21e941b609aba4e301c780" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "f428a093c0e79a4533b26fd3ee8804f6" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ee86319b0568953d8965057869aa87b8" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "fa89a5f021c29c83bec5251afe8902ed" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "37bbde1dcee406d1a89118e899263fa8" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "cdef834f84bd85bc31496af59e9c2ad0" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "eb67ebcbcf5d1b2c54ab7c7b3b86fc68" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1a64e53225733648302bd98380c10af6" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "8168a35651b1d84ea26f9a18c3958334" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d3a8cc99741404d99662c67e8f3e36f1" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d926ef0513b0c26342a654defc2b343c" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4a31b41e578cb2f6b32406c397ae50ad" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "be01afa385f4558b33d81fd34d355895" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "37a612ee8666f9edfef8c87805e8fb5b" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "064d0b115965fc7179da8875400e1a08" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "54e72f3bd771576220a90843fb6c841c" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "faaad705ee397ef35f5ea2a9cc276545" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dda67fee634aa7b33e3a2f68c37d39bc" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f3bd9f7ceb0c71f72cfbdfd1f7255d31" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "df1f63ed9eb2e09c517612208e24cc0a" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "4b4aaeb8d198b6c5b9286365004f9dbf" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6d8e4784de374449c1b6b6a61d758792" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "781a6ff0cf48c17bd393304b3b4ccc43" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f8fdb98bcc3ac6b8266f302c979330bc" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "0223b99a6e1f8131af23514972c242d9" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "94e657e7cd51219681646b877342ee91" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7195ecf8240a9fdee21aeb57ad5f0a71" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "d57900deb47c8e22976ad6b24e9d8a26" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5acfb032c9154da3291f5e2219df7e9c" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f46c5a3c36637518266ae6cd9f6b4b0f" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "105cae8ae986b849b3cebf5beddaf4b8" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "ea252b4969cd0fe9e9d5658f18c63eee" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "771565421a5eaed23abdadcaaa2f7457" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dd623779fb4b8d2b4456680625f6484a" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7a0e1ff5abd73eece00e398f41ff5c0b" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "5dd1c15c60c9a156af9656f5cdcaf53a" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7726066c064a12cfaefc7b08a5c6b92a" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d8dd3c5e4dff485ca0b0d3870ef0d8c8" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "00172485f96e9dd53255b2412c3f0b02" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "9ebb99c56696b11bd4415ade3db6b016" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13516800 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13531136 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31881216 } ], "md5sum": "0ea9f3edd7b0e21c523b0666d7ca030b" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "795ed73d0fdef4286e27bf6aa13ad052" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "95b1ccb3050b5e3f27dc00ee98a733f0" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0854ca94ae8049b82901c93b00edf359" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "aed126a0b0123abb6f29494e616a2c79" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 22992896, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15738880 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15749120 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 15759360 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17971200 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22394880 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 22405120 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22419456 } ], "md5sum": "6701e39a5ad65d7f1cef1a50d3a8e530" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a390a6a757c3479187b636d6bcf6df14" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "79818dd43cd4901b1aa9190971e0bee1" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d86161536fee956d3fc6512f786551ef" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "732543c515b1c0ea381c8e7313241d60" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9d09eb1e402bebe8822713a6158bcc04" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1dafa74cad0777522482ffe284213266" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5469683cde0588f8d4144f0a3771e181" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "4115c910e3637748c5dc09a4671853bd" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "55bb9b7ead64a1519c6f425fb569fe1a" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "eec4d8d4d551bec1896cb540cdc49dbe" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c830a1fcdd9e036018705ac45a514947" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "67ba6adabba1437f188eafcfa4ea36b0" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "4437474746d56bc6bba7b1a4d4e13e4a" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2acde3c8a74d4dcd187bc1aeff732b56" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3b7881272a7f238754cece65ec187f00" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "9ad0adb777bd3f5282b62285bbaf2382" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b891c08c2ad8891cdf015b521a28ec7b" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "87805a38e0402960b305b61c5e92000c" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "543b1717cede80c47cda6b10a7900893" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3709941329bab398b06ae5fcb7b9b8de" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b5910dc3a287826b43a4dbb49e2a2ba9" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "be40a3185f4c7558c09c6f2ec2200d1f" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "b4cdf949151e8a94fbdddc88b1c23d71" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "81690199066b11e9e9e0c82b8380b243" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d675c215bcf77624de83e87874a84692" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "bd998a8e6f7f46ceac2355b22bb1d0f5" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "cc44c4a0ab17f1c8ee82cfaff3ad8cc9" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ae5ab694a50233e7220c7b1cf9d2b740" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "4657b50fe77b2b9fd3f44ff529b07e0a" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "26cdeda16eeead482af32c63c8d02a9f" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "e878c52f56f8b630a0a035889c950ba7" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d1d664aaa338d20f0950f601a21115e0" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1db023ed76054e835959cbb438308188" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dd2ba66b0881c42cc1da683c2fa5c8f7" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "b8f63f2cd6a4d49557a223485bc7b2df" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d8eda2ad7ff10e70fde8d2938c641e00" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "71ff65474ee9ebe4a5c5171098332234" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "de4e237ee19e5390741fb6a1e591fb3e" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ea3d989885d809a29d45c61a4766f4f1" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9d5e6c983a924949357a4cfcf982b6c9" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "38f5a81a6d61f195ea2e51a7447dd2a6" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "857c027429676fcfed1f7a4d06d26e37" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d238bc493b568c0a96489ec1095db7aa" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c9642fa22810375916896936b50d86e2" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a8e6f74dd0a1cc27274a79b2bd351f3b" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "3ba9c808d227a7586887141b8523aef5" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f88942ffe9c2f3a4fd23654d74c52824" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8939373f3435e1746113db9357376ee7" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5fa952799470347f5aa5508fb140bf11" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "5af2f788dd766f0ef2f5f10ce4b3bbf1" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "89d5bf73182928a76cf5fa550d1ce815" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f8e7de3b15f5d172ab217f50509063f8" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "79d1d1b18d97c78af81558b82f1ed737" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "2e21c76c5402cbb2dc7d7a51312ccd8d" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "04d30316ce788e36d008a81478076335" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b9d7460f35440fed2a78e53a7ed803b6" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "1ca977359b02ff57497b872545b98a8a" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "41afcf3478eabb4f37a23933194cba74" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e49450e423b984ada42c343a8ccd5dfa" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "efc4479f75202902da7d759a06ef39a1" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "43b0dd92f7dc9b0b569cba2908c114ee" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3f43a3335a78e73364fe7b364b8517f4" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8e234bdf005508659bf099db9fd6abd1" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f05c51af50e806ed0e8996f159e98bb9" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "541709ed1cf5851164daaabc6706cf48" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6b9363cf54842881dc778025becf3545" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "99444ff8bb34c169cea81d62f399831a" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "89ce449f1478933f2127b95f2b9dd6e3" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "6524b7ca32451e936a1cb1510195901d" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5b0f530a75a2da9d2e8f5b851aa630e5" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8206a0512493a563fd5981fc95b43f4f" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b236879d28b4114cb414ddde1c5685e0" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "08ac3fcd81e309ef4733148be8a03e72" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "71d1066651600f77d1cb9f790456f5ce" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "719a57400b5e416434fb61568aaeb2cd" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "41a007f1e5ce71491f6191ebea2a5d04" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5b9052c5ee9b0d81ca1acb5a44746661" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fa2692be3c074157881f2b93f5336315" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2f8a73df6d844bc44526172dbc769f39" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "b22893e8de037aac049b05fe93d6782e" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fb805b62ff91c4fb96ab13b654f60d15" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c3df377bd54d107323bcc6f2bec770a2" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5f01d2ea8f087aa5997bfa6ba0b22db6" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "5b9e5299780b8ae1d5f4f85995646ee0" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "caec105b9d6009fe2fed201e139aff81" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2103f2ee7a88a36bbc190561c8779eaf" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "67809ec4be23aec432274779332294c0" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "9f0bb73b96eb1c3481fbdb3ec5b5ca22" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5e98572ea94d1bb09f977d297099bcaa" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b12f7d4ca29a98d9468aafdd2ed120a8" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cc4c8814fdd3f6c2315c2e73a4a96e35" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "7921143b6569856652c56c63a2dfe46b" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a7115855108ca581a258ed71debcf1e4" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "81e75a1f3818d9546faa290a7a6f7a24" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "9c6fcba272f7e76680c1cf143e2ba674" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0ad8869d6e55ab116359aea3fe2434e5" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fa927fa15591d3878f5f449bac059eb4" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8452ffbbf8b50bc3d2acfa5e1f83ae01" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "ef963790d742d6f8c956bfc17fbedec7" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6a22e9d9b8ab192507f994c21bc56fb9" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1204b8f23984c0a825978d5461b191a1" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1929f3c5fcf5a1eed7981103fd4f5f75" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "4a16a9f5b5932750a414c412e05560db" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "50258ff53350959cbed872c2e16332cc" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8d7c6a1a06ba24e81ed85da4861a4ed3" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a78f2503799db9d747aa506232ee6bad" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "79faaef77061d9625c06a5c905a2ede7" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13516800 }, { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13531136 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31881216 } ], "md5sum": "041de6bd945444b3640df632358a5f93" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 13516800, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 } ], "md5sum": "a6e9310a9f37cd4cd2af5482b58fd9c5" } ] }