{ "metadata": { "ParamSize": 1119, "ParamBytes": 16885412864.0, "BitsPerParam": 4.924223553723174 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 704815104, "records": [ { "name": "language_model.model.embed_tokens.q_weight", "shape": [ 262208, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 704815104, "byteOffset": 0 } ], "md5sum": "c7be8b27d7f8121541d55a7f4fa47891" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 88101888, "records": [ { "name": "language_model.model.embed_tokens.q_scale", "shape": [ 262208, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 88101888, "byteOffset": 0 } ], "md5sum": "c10f794772334957d727081ee5c0d299" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.0.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "328d8600318a5882ba7456a7bf75156d" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "7fb6f0a3e8953616ca2ecf2d272116f0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 27912448, "records": [ { "name": "language_model.model.layers.0.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 0 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 10752 }, { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 7236096 }, { "name": "language_model.model.layers.0.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21686784 }, { "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21697536 }, { "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21708288 }, { "name": "language_model.model.layers.0.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21719040 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 21719296 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 27224320 } ], "md5sum": "1a377ebbbb20376d216a2d12958b9c57" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "e7aa76c98ce00001e4ab0c6dedb5319b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.0.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12386304 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 12386560 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 23396608 }, { "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 24772864 }, { "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 30277888 } ], "md5sum": "b98f6205ef9fd72b2196c2134958ce25" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33030656, "records": [ { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.1.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14450688 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14450944 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19955968 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20644096 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31654144 }, { "name": "language_model.model.layers.1.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030400 } ], "md5sum": "6419d444b06a8eb32b777d4485fc11cd" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.1.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "836adbdd745bb560644d23af87d85223" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.2.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "ce9e9ddc9f39832c9ef9848b9bd1cc3d" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "204a70abb3bb4ef9eed338d76b725142" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33083904, "records": [ { "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.1.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 }, { "name": "language_model.model.layers.1.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25815552 }, { "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25826304 }, { "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25837056 }, { "name": "language_model.model.layers.2.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25847808 }, { "name": "language_model.model.layers.2.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 25858560 } ], "md5sum": "1ea9f0b1424df087dabb394d3ce2a19f" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.2.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.2.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.2.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "83cc9d436c79b70861487ba6bcb74cda" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.3.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "f4f659d006230e5426652cb9874cfae5" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "1c1edf6692bc06aabab3970fae158a1a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.3.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "d6b030f9d84e67abf48d77703c8b0965" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.3.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.3.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.3.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "2c148423b283ee6fc21643f33e8aa7bf" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.4.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "d5256c9a674aca5342ea979c70ba95d1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "49adb408048b41cfac3e09496ce3beb7" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.4.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "e457a9ce042b6f2b8e1fbbf2fbb78b67" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.4.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.4.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.4.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "8b3b8747d993dbec71808e8fb0a45acb" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.5.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "80dada3dc22a37e6cf2a657bbba2aa6e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "6e697a59868c469e0e229fcee0078937" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.5.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "f6c242109caaf2d556f4a9fb52715725" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.5.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.5.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.5.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "652d63c69c935b546ee20dc827570dd2" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.6.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "571b3d5140215b8964278f00446d904e" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "f2b0f93ca180e850e8a8382e1f18af46" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.6.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "585c87fa21863424d57e42f0718a3033" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.6.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.6.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.6.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "d657e09432de39907ae200a67f36bb71" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "a368b70045dfebb2e880b67246563b13" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.7.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "266b9b1b6f9328457be6adf5da7cdc53" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.7.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "b058823f2dd634ccb48062cefd4ecaf5" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.10.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "cf0f4d5eeb2043a51a8f8a92600316d5" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "7f4a4724c605487003d4b8d856dd82af" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33417472, "records": [ { "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.10.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.10.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 13429248 }, { "name": "language_model.model.layers.10.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 27879936 }, { "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 27890688 }, { "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 27901440 }, { "name": "language_model.model.layers.10.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27912192 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 27912448 } ], "md5sum": "b725611f8e7f5244daef86a1f2154f68" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.11.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "b1d2e66b78ce0ca85948781bfa2fb01c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 31664896, "records": [ { "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 0 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 688128 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11698176 }, { "name": "language_model.model.layers.10.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 13074432 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 13074688 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 24084736 }, { "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 25460992 }, { "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 30966016 }, { "name": "language_model.model.layers.11.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 31654144 } ], "md5sum": "83c3f15f499a89f57b004e20a4265c04" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "21a909c9fd48ab71f45adb52c24b9da8" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 27901696, "records": [ { "name": "language_model.model.layers.11.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 0 }, { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 7225344 }, { "name": "language_model.model.layers.11.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21676032 }, { "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21686784 }, { "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21697536 }, { "name": "language_model.model.layers.11.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21708288 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 21708544 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 27213568 } ], "md5sum": "0de860ca65f7e6f231964b12956b469d" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.12.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "788f4acd5cec34f7f90a912db6d7cd3e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30976768, "records": [ { "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.11.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12386304 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 12386560 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 23396608 }, { "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 24772864 }, { "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 30277888 }, { "name": "language_model.model.layers.12.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 30966016 } ], "md5sum": "37c40ed3dd71958b3c56a3d023752cee" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "a0a032dee1d8ad73eafaba0f67d8c7fe" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 27901696, "records": [ { "name": "language_model.model.layers.12.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 0 }, { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 7225344 }, { "name": "language_model.model.layers.12.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21676032 }, { "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21686784 }, { "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21697536 }, { "name": "language_model.model.layers.12.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21708288 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 21708544 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 27213568 } ], "md5sum": "8bc51c5e93af8d33b88f37716babfa81" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "25c2612d06acbfada1bf01cbef1ba85a" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.12.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12386304 }, { "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 12386560 }, { "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 23396608 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 24772864 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 30277888 } ], "md5sum": "680797936e7bdf603df9fb708618c5c2" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33030656, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.13.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14450688 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14450944 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19955968 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20644096 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31654144 }, { "name": "language_model.model.layers.13.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030400 } ], "md5sum": "4bc70ce6968e1ca067338b6b5f685846" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.7.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "d68c1ae546bf4b0bee5eac4aef6bd494" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.8.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "00532556e6b8b5a7a432448cd5f23d25" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "301d6b7d43701f975b9944f3fe80f37a" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33083904, "records": [ { "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.7.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 }, { "name": "language_model.model.layers.7.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25815552 }, { "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25826304 }, { "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25837056 }, { "name": "language_model.model.layers.8.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25847808 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 25858560 } ], "md5sum": "c157207b53ff8207e10a6542ca051424" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.8.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.8.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.8.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "ba1e15bc6da0303f96e3a28c6e82848c" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.9.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "600775a73089671a9d6da36a977f1cb5" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "de7402c1b7eee2709267b42b99c3efb9" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.9.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "f349890eb1e2b0e6673af4d4519ed23a" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.9.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.9.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.9.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "d416d4b5370557a04fbd3d8cefaa664b" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.13.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "adaec0a3dd1ed2bbaba4458cf6d20a97" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.14.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "6471a3ec0da087523c44353d63bdd7c6" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "525aa7452418210ecefe3a9991bafe85" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33083904, "records": [ { "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.13.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 }, { "name": "language_model.model.layers.13.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25815552 }, { "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25826304 }, { "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25837056 }, { "name": "language_model.model.layers.14.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 25847808 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 25858560 } ], "md5sum": "911bb7eee06bf7d5f1facc9f1ccc8326" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.14.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.14.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.14.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "9b5ca3b36664736e4f4548b972f773e5" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.15.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "56336311b8330f897c0f03273403c99f" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "d663218bc492afdd15abc23cec9b8f77" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.15.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "4d0ea7997e2db5dfc3df780701e33e07" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.15.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.15.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.15.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "870b242b7e6f3acef373008e12a63510" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.16.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "b0fc5488adf273c29e769d9e8052b5bf" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "320d48c567f20661af205a7f775c1034" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.16.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "fe0b4a31cd4eea02404c1ab4d591e27a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.16.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.16.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.16.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "6f7b2ef332c17581b606c70c7bea4cda" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.17.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "a6596da0a6c7ab138f2301d988b0def6" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "9763ddc13d4657aa9e3a0641feb57441" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.17.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "a4964fb28bc95f796ec8345ca0bb5b72" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.17.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.17.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.17.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "e635298129748eb4f1399ccf4c522d54" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.18.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "c53a6f854f0c5642a4734a6d9372c75f" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "a5cfd8434f7072877240f9666c758e2a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.18.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.18.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "842c4dc7b527a276c3d82a70d3a09536" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.18.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.18.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.18.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.18.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.18.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "ef34fc2ff6c9f709506c843c3534357d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "7ce54e510463c60d0d01bdf81973c5b8" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.18.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.18.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.18.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.18.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.19.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "24e6ec269d09b321ece906ea3c0be399" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.19.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.19.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.19.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.19.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.19.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "134a33a122571df7371660109eb7336a" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.19.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "f6d0dca60ff47e2455baa549d88d5b5d" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.20.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "83f8142f3784d04dd9a4fe342347a7fe" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "e43270e6c8b36cb5708b5710a1b18a74" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 20697600, "records": [ { "name": "language_model.model.layers.19.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.19.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.19.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.19.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.19.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13429248 }, { "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13440000 }, { "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13450752 }, { "name": "language_model.model.layers.20.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13461504 }, { "name": "language_model.model.layers.20.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 13472256 } ], "md5sum": "c80516bafa158db3a8e0a5c11e56bf10" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.20.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.20.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.20.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.20.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.20.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "851ce575c6cbfba82230c9837531889d" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.21.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "59df9f4d8fb3cafc91951d683972f34f" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "8155496875270de52583a2d351bc2f12" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.20.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.20.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.20.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.20.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.21.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.21.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "ca183d85e60e8c13810faed0414fe9e7" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.21.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.21.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.21.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.21.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.21.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "b402b99b74fd038350c7a61707218f08" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.22.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "9965b1664da7bbf59d4c595d64dea14a" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "325dd829db29ca9a1516c724a9088575" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.21.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.21.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.21.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.21.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.22.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.22.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "31d728ec6af2d4bf8f8082f74c47482a" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.22.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.22.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.22.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.22.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.22.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "fc5604c332a5aee3e6df15dbcae429de" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.23.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "6d3c17cf6d299f2b27f04fca475b7f00" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "ed0921253eafe1323333b5a73a759ee9" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.22.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.22.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.22.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.22.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.23.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.23.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "85ceff56cf6836428af68e2c91c9c061" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.23.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.23.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.23.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.23.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.23.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "ae2afe8f39c2135c2c3395c38a900417" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.24.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "8c3b628861db83feb39e91c34750cecb" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "bffe119b5b768eb16d80ea7b8026e6e5" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.23.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.23.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.23.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.23.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.24.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.24.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "8c7dda03270d9b9754f344f24d5eab80" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.24.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.24.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.24.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.24.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.24.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "4f6c502d6ddc6eb8e34445ba104839a1" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "98f6c7fccb0fbc9e85d04cf65aaa519e" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.24.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.24.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.24.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.24.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.25.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "c5fcfe7eb97ab6d5a13591d006755304" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.25.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.25.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.25.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.25.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.25.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "24d54b504377b8bdb3d5e808ce5cbd95" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.25.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "5ea288c450654ff8a18a6f5db2bcb6de" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.26.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "3a71b72ac2805a6eaebe9a5ab4022fbd" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "a14bf4dc43d34c19e8256dd65159db94" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 20697600, "records": [ { "name": "language_model.model.layers.25.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.25.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.25.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.25.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.25.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13429248 }, { "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13440000 }, { "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13450752 }, { "name": "language_model.model.layers.26.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13461504 }, { "name": "language_model.model.layers.26.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 13472256 } ], "md5sum": "bc0057aec43844ca14adb06f6edc74c8" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.26.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.26.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.26.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.26.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.26.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "32c9f203a9a9094696cc82461d87fb01" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.27.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "d3ba4908e21ab080c9958c693978e430" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "0435f1fa286c42d62612721064a6d361" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.26.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.26.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.26.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.26.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.27.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.27.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "30cca0c772c56e48a330008a67ed4405" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.27.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.27.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.27.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.27.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.27.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "d2af03b9327da52dadb4c2bce79986a8" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.28.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "e85ca639693c2d5533a8db7823a090bc" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "1041056fce41b1e30991d7b4f0a35831" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.27.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.27.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.27.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.27.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.28.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.28.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "c594b28ba225ba0c21a7aee2d6a35639" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.28.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.28.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.28.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.28.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.28.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "8d31dfa29b98e3f425ac27351d6f0059" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.29.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "a83d3edcc7114c051aedf1a9b997d650" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "be6011e1e4492c848eb83c1adde9bf06" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.28.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.28.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.28.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.28.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.29.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.29.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "6aaa4f9ac5b8c03b3dd0fedfc29adc5e" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.29.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.29.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.29.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.29.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.29.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "39a8f36537567deebd3a58af343c5a27" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.30.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "c1dc493b59993f37678c643be6952fec" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "5e3d8d1852756c57b31a65d259c34693" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.29.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.29.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.29.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.29.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.30.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.30.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "30815a62b85416faf96e5685892c677d" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.30.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.30.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.30.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.30.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.30.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "23948a4d5f66d1d6da608d986f216108" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "92d2f9816e19910fdea2da4c6605875e" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.30.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.30.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.30.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.30.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.31.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "c39f8f561071e86bbe257dd49c2d2c20" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.31.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.31.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.31.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.31.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.31.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "1957dda6255d969db1cad8637b16008e" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.31.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "1eaecef2655016fed9a2b8a782916f68" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.32.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "d9f33bbdd61d70a9531f5ab02d346007" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "949ba2bd55676382ebdf6d6683df2a88" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 20697600, "records": [ { "name": "language_model.model.layers.31.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.31.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.31.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.31.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.31.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13429248 }, { "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13440000 }, { "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13450752 }, { "name": "language_model.model.layers.32.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13461504 }, { "name": "language_model.model.layers.32.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 13472256 } ], "md5sum": "83a81aad514c2b42c3749bcbe10fabb3" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.32.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.32.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.32.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.32.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.32.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "e3b3f32fe46f267947df5f3f623f5d41" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.33.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "723278d79bc025df69bc3c4558575799" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "9d89bb66e30cae8f4428aa58d02555a2" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.32.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.32.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.32.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.32.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.33.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.33.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "1514c271696ff75d9728c3b4b3e266aa" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.33.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.33.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.33.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.33.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.33.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "928cfa5e9548fd7c718ce01f10122be1" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.34.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "fb3535faeac904886ca58c4d37c59de2" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "c755e467bcec11c396273dfb70c36eb6" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.33.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.33.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.33.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.33.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.34.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.34.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "6db2441faa5f75dadbafdb105c320df5" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.34.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.34.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.34.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.34.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.34.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.34.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.34.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "f362db76b97f968697f489fc576e6ada" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.35.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "ed727a7ab1a1742d83f05320b928837b" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "804246441834c774f258792374fef228" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.34.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.34.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.34.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.34.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.35.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.35.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "89f404bb745ec5d11d763dd5ed7eb7cc" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.35.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.35.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.35.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.35.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.35.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.35.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.35.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "b31666d6f92ef8e15dc81de4a2eeb806" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.36.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "169cf10628a3489a21da9ec8128a542c" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "10c16a755540e3b13a6058b180cdd96a" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.35.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.35.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.35.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.35.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.36.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.36.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "25b98c12e72d87e43efef7639b950130" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.36.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.36.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.36.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.36.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.36.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.36.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.36.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "b19d01deea4e904a76233d09c43089fa" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "89fb5d1e85b005a3beb6477cc0032c8f" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.36.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.36.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.36.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.36.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.37.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "9a2ea8b43f452a549b9f77d8b536d98e" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.37.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.37.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.37.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.37.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.37.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "2972b77b2b0a1e73b6fb3e1b2676c504" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.37.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "fadce20f680e03543a9cc0a9131b0995" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.38.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "66c3d5b0139784db28fdfdea5942a7ba" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "f378701f4e6b767b8fab0dd0fbf838a7" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 20697600, "records": [ { "name": "language_model.model.layers.37.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.37.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.37.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.37.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.37.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13429248 }, { "name": "language_model.model.layers.37.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13440000 }, { "name": "language_model.model.layers.37.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13450752 }, { "name": "language_model.model.layers.38.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13461504 }, { "name": "language_model.model.layers.38.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 13472256 } ], "md5sum": "12ccfa3371c46433464158670549a734" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.38.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.38.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.38.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.38.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.38.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.38.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.38.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "ebf6bc2fe32d7273928746401f38bd4c" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.39.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "cd9daa6e7b151175cd44405ce09f16a6" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "9b26aaa9a98744c166247a0df0b5b2f8" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.38.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.38.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.38.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.38.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.39.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.39.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "b637e0e5794b04eaa0a4d7522b4e36f6" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.39.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.39.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.39.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.39.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.39.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.39.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.39.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "286613cfbeca51f43de7cc4c1573bab6" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.40.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "8743d609d6de31c72c3ae4e50b840948" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "363f90dde3a994d0c9d7e6186a86e3e0" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.39.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.39.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.39.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.39.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.40.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.40.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "f6ffd859a6401172c3e007b7bd2b0ce9" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.40.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.40.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.40.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.40.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.40.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.40.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.40.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.40.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.40.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "fe0f44a1b64b39be4db5525647eaa911" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.41.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "88315e45b6e98d72e1cff6c1c40ec6ca" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "93059cd7bbd9bbf486bd54c6f7bc8406" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.40.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.40.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.40.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.40.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.41.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.41.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "4ca9af3472a8e19a09fa0059efdcf827" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.41.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.41.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.41.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.41.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.41.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.41.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.41.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.41.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.41.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "f1810b498ed7ca15dd8e28544e616422" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.42.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "1a504ae4994c30b81d5d8434110550df" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "b1f37d581b6c3e7b0391ab4d5c4cee97" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.41.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.41.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.41.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.41.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.42.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.42.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "b1f4c3e0c5f22243be46ff162de86690" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.42.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.42.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.42.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.42.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.42.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.42.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.42.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.42.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.42.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "ec4528e6dd3572d166b1fc1c58c5dd1a" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "73b38038edf4fbd9da2bdc7f4f0695f7" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.42.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.42.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.42.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.42.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.43.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "9d9f5f19d93078f8b3e8b82b63721e7d" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.43.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.43.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.43.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.43.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.43.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.43.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.43.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "2ed295249c916580e4c9b9808cfddf33" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.43.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "0676e592cc64e069be0f21acf75bb0e6" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.44.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "6cf61787e6e4127c734c607918b2fe10" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "3f19f17b0f1c7c6e6ae3690f0b7ed4f5" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 20697600, "records": [ { "name": "language_model.model.layers.43.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.43.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.43.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.43.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.43.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13429248 }, { "name": "language_model.model.layers.43.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13440000 }, { "name": "language_model.model.layers.43.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13450752 }, { "name": "language_model.model.layers.44.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13461504 }, { "name": "language_model.model.layers.44.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 13472256 } ], "md5sum": "b0d7cba9db90264e5a124b50e3b39e53" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.44.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.44.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.44.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.44.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.44.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.44.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.44.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.44.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.44.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "3dc956854725b9eaf8d5213e45427c22" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.45.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "17cc2a35fde189210b0ef45b1b1cb5a4" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "2d678f7fcf8698570e55dc5fca1c0fbb" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.44.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.44.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.44.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.44.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.45.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.45.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "71cd0dfef794383fa629f3de047a5615" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.45.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.45.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.45.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.45.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.45.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.45.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.45.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.45.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.45.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "ea0d4cbdcb252a0bcc01c5af2443f3e0" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.46.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "25f4b9b26444631da008f0f62fb54c4f" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "29addac02fa1e36465138fb2278ed21d" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.45.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.45.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.45.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.45.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.46.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.46.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "e3466875d54bded514765026e409ed4a" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.46.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.46.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.46.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.46.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.46.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.46.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.46.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.46.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.46.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "b4b1833df9cdd15ca0aef5c37e66b961" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.47.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "bb990e234173e195aa64dca3d89ec99c" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "f4401d85202153e730549dfd45e7210d" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.46.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.46.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.46.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.46.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.47.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.47.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "0ebeb52a488fc19f8380aef096f3ec64" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.47.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.47.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.47.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.47.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.47.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.47.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.47.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.47.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.47.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "dc70351ce9d34dfd75d28df4e64b8a0c" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.48.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "ecd277e66db1368de79d080bf8cb9bf1" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "be8b046dcc6810361a4e0952a86edb19" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.47.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.47.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.47.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.47.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.48.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.48.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "4fbc94f66d301caa634b5c5b1e337368" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.48.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.48.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.48.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.48.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.48.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.48.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.48.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.48.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.48.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "842a5df2cb765f6fff8d7211ebe8acad" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "4ae8385386e5057c01e31cc3598cc92b" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.48.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.48.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.48.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.48.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.49.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "81604745a7cfa8a8dd0fbaed963d486d" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.49.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.49.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.49.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.49.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.49.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.49.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.49.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "30cac313a18e00f02b62f79cccccb06e" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.49.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "f7dfc0463ecc3c90967ae78fe157c6a7" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.50.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "2160df7828e4286c09b2c35eeb77ac73" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "cb39bb31aa5ec96b5113d60325ce8609" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 20697600, "records": [ { "name": "language_model.model.layers.49.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.49.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.49.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.49.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.49.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13429248 }, { "name": "language_model.model.layers.49.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13440000 }, { "name": "language_model.model.layers.49.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13450752 }, { "name": "language_model.model.layers.50.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13461504 }, { "name": "language_model.model.layers.50.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 13472256 } ], "md5sum": "d34403600609534844241d6fdd2ad273" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.50.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.50.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.50.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.50.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.50.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.50.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.50.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.50.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.50.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "69d4f71fab0e18a534799616c3cdbc50" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.51.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "c55e55e6074b6e17c861ef97d965f997" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "de7ffddf699d5f82131015f9061f92ab" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.50.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.50.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.50.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.50.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.51.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.51.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "d15f6944ad993ba7c9e24a296bf49c7b" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.51.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.51.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.51.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.51.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.51.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.51.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.51.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.51.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.51.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "a8f245723efa25ea433ece8c6ca40cd2" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.52.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "c5a086dbbcfc3e60e4eef1d0ef36e129" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "48fbcdc9a7e201c9bb591eb7e62c63e1" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.51.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.51.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.51.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.51.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.52.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.52.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "3bf58e3a2e63ae8656fbf1533df002cd" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.52.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.52.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.52.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.52.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.52.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.52.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.52.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.52.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.52.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "e93a21e346564e4bdcf0d5db01737ad7" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.53.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "3ad0cbfe48dffa6756fa60c19fba8447" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "d5490628a29ba6224d6d4d2020167492" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.52.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.52.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.52.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.52.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.53.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.53.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "8aeb8982b1337024de0d569b42d63446" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.53.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.53.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.53.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.53.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.53.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.53.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.53.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.53.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.53.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "750dcc050a48679bf61adc87d7a16bcf" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.54.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "a598acbd40ad6da71f4c81fde2b232bf" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "a4386ee655501790ec37c41718259ed9" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.53.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.53.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.53.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.53.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.54.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.54.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "7964ef3d6764004e0137872ad47b35ed" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.54.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.54.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.54.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.54.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.54.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.54.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.54.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.54.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.54.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "c0a44d5a386ce4e76116ba33b8fc993e" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "2a882babf0d472591cc6a1845d07158a" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.54.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.54.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.54.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.54.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.55.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "a76487fb220c833ede6e9041ba264355" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.55.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.55.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.55.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.55.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.55.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.55.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.55.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "bdb00fc1c6590ef7e79b0acd6c41c3a9" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.55.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "c00a438d7286435d3cb515dee27a139c" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.56.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "7e2657bf39040f8aa6b3e498d7648109" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "6b74fc4067cc736167c726b9dd46d62f" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 20697600, "records": [ { "name": "language_model.model.layers.55.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.55.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.55.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.55.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.55.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13429248 }, { "name": "language_model.model.layers.55.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13440000 }, { "name": "language_model.model.layers.55.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13450752 }, { "name": "language_model.model.layers.56.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13461504 }, { "name": "language_model.model.layers.56.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 13472256 } ], "md5sum": "e6bc7bfe8d299d0215b7825ede92db31" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.56.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.56.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.56.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.56.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.56.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.56.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.56.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.56.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.56.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "4f85d1d284019555f447761e6dfd24c9" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.57.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "f25d3aa00e5bc38ec0d42497684e4f14" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "f2417d6805150d338207509994db4062" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.56.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.56.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.56.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.56.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.57.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.57.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "fd912e1fd426fdb751182770cf17bc08" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.57.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.57.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.57.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.57.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.57.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.57.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.57.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.57.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.57.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "e13ac3bfa1c79d113cfba3cda4648d57" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.58.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "51c3ad3624ec10ec743967adcbcb5c0a" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "4976ea049dae9221aaa620c796253bd7" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.57.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.57.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.57.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.57.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.58.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.58.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "879ef90bb41833da6211055183dc553a" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.58.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.58.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.58.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.58.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.58.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.58.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.58.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.58.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.58.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "a431634585402f536af49490ba2cdcae" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.59.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "6867dad8455ed171baebd96b742142ac" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "466437631b634907e57d8c449b6a3417" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.58.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.58.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.58.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.58.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.59.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.59.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "57db8bdf5d26bc003ee4dda2888d1450" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.59.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.59.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.59.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.59.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.59.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.59.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.59.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.59.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.59.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "06edd0b57631492093a96ef66e2b8284" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.60.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "5cf42b44092373d28a6087591a07e637" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "a04ebf037f9f57f1d1d004592d467d79" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 25815552, "records": [ { "name": "language_model.model.layers.59.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.59.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.59.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.59.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.60.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 18579456 }, { "name": "language_model.model.layers.60.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 18590208 } ], "md5sum": "2df113a0a04bac151a1cd5912fa388d6" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 33062912, "records": [ { "name": "language_model.model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 0 }, { "name": "language_model.model.layers.60.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14450688 }, { "name": "language_model.model.layers.60.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14461440 }, { "name": "language_model.model.layers.60.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 14472192 }, { "name": "language_model.model.layers.60.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14482944 }, { "name": "language_model.model.layers.60.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 14483200 }, { "name": "language_model.model.layers.60.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 19988224 }, { "name": "language_model.model.layers.60.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20676352 }, { "name": "language_model.model.layers.60.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 31686400 }, { "name": "language_model.model.layers.60.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33062656 } ], "md5sum": "9160a48e8055cbc9897a3f3ddd3f7678" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 115605504, "records": [ { "name": "language_model.model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 43008, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 115605504, "byteOffset": 0 } ], "md5sum": "a04218dc1117ff93e57654ef3d9307ea" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 33030400, "records": [ { "name": "language_model.model.layers.60.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "language_model.model.layers.60.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 11010048 }, { "name": "language_model.model.layers.60.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 12386304 }, { "name": "language_model.model.layers.60.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 17891328 }, { "name": "language_model.model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 43008, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14450688, "byteOffset": 18579456 }, { "name": "language_model.model.layers.61.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33030144 } ], "md5sum": "3b5e3e49cefa357975660222c98fb26f" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 30966016, "records": [ { "name": "language_model.model.layers.61.self_attn.k_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.61.self_attn.k_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.61.self_attn.o_proj.q_weight", "shape": [ 5376, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 6193152 }, { "name": "language_model.model.layers.61.self_attn.o_proj.q_scale", "shape": [ 5376, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 17203200 }, { "name": "language_model.model.layers.61.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18579456 }, { "name": "language_model.model.layers.61.self_attn.q_proj.q_weight", "shape": [ 4096, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 18579712 }, { "name": "language_model.model.layers.61.self_attn.q_proj.q_scale", "shape": [ 4096, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1376256, "byteOffset": 29589760 } ], "md5sum": "68c3c8e72d3dac717d1e28a79ff3522c" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 57802752, "records": [ { "name": "language_model.model.layers.61.mlp.down_proj.q_weight", "shape": [ 5376, 2688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 57802752, "byteOffset": 0 } ], "md5sum": "c37f54e796d6b6924ed7471484417a23" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 13472256, "records": [ { "name": "language_model.model.layers.61.self_attn.v_proj.q_weight", "shape": [ 2048, 672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5505024, "byteOffset": 0 }, { "name": "language_model.model.layers.61.self_attn.v_proj.q_scale", "shape": [ 2048, 168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 688128, "byteOffset": 5505024 }, { "name": "language_model.model.layers.61.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 6193152 }, { "name": "language_model.model.layers.61.mlp.down_proj.q_scale", "shape": [ 5376, 672 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7225344, "byteOffset": 6203904 }, { "name": "language_model.model.layers.61.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13429248 }, { "name": "language_model.model.layers.61.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13440000 }, { "name": "language_model.model.layers.61.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13450752 }, { "name": "language_model.model.norm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 13461504 } ], "md5sum": "a8c016a4d492df9b0a5f29f63605317e" } ] }