diff --git a/mlc-chat-config.json b/mlc-chat-config.json index 15d47976ecaceeb1de6988b3c846ee946532e12a..ad7d95f6fad21907cf051e5e73e1d85a88e54852 100644 --- a/mlc-chat-config.json +++ b/mlc-chat-config.json @@ -45,7 +45,7 @@ "presence_penalty": 0.0, "frequency_penalty": 0.0, "repetition_penalty": 1.0, - "top_p": 1.0, + "top_p": 0.95, "tokenizer_files": [ "tokenizer.model", "tokenizer.json", diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..ef40b9638eae8e3c919f37c2293c404da118d7d4 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,13937 @@ +{ + "metadata": { + "ParamSize": 1119, + "ParamBytes": 15194704384.0, + "BitsPerParam": 4.431169188588552 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 704815104, + "records": [ + { + "name": "language_model.model.embed_tokens.q_weight", + "shape": [ + 262208, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 704815104, + "byteOffset": 0 + } + ], + "md5sum": "ce2814e6ad13ffb16e9be092acc67608" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 88101888, + "records": [ + { + "name": "language_model.model.embed_tokens.q_scale", + "shape": [ + 262208, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 88101888, + "byteOffset": 0 + } + ], + "md5sum": "c10f794772334957d727081ee5c0d299" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "5b48c2508c2396ce6eabf939681896d3" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "fc1725c3f75d44ff875e949b3413fc21" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27912448, + "records": [ + { + "name": "language_model.model.layers.0.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 10752 + }, + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 7236096 + }, + { + "name": "language_model.model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21686784 + }, + { + "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21697536 + }, + { + "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21708288 + }, + { + "name": "language_model.model.layers.0.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21719040 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 21719296 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 27224320 + } + ], + "md5sum": "07f59632be8b4f9506530f4f3ad89943" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "54317f35fe466b792cd669bd51ef66e7" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.0.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 12386560 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 23396608 + }, + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 24772864 + }, + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30277888 + } + ], + "md5sum": "65b560f113a5889ab5737e7ee085a0e1" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33030656, + "records": [ + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14450944 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19955968 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20644096 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31654144 + }, + { + "name": "language_model.model.layers.1.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030400 + } + ], + "md5sum": "11532fd5d09218edef6ee6400c012638" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "bf545ad2d38a58507f4bdda133853b8c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "7bfd21d2d186eb01bc3fa50e122e7f45" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "5a77430589afe193ffd6ff719a73c8d7" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33083904, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.1.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + }, + { + "name": "language_model.model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25815552 + }, + { + "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25826304 + }, + { + "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25837056 + }, + { + "name": "language_model.model.layers.2.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25847808 + }, + { + "name": "language_model.model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 25858560 + } + ], + "md5sum": "02019fe5c639a22e86203eda1306f455" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.2.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.2.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "24c0ce83bffa9ce0a95146c422fc5e95" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "b9bba110767a481e26f5a6e9140631a0" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "8009ad539b84e00e6cc76e302b1fefd4" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.3.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "763d07a7c40f78eac1199ff9b46db202" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.3.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.3.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "40095892366b3c70529f9ed0227949c6" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "84fdb1e7e2645aa0b2b0f0b82a0e4d94" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "2f9ce05dfd130e5eec960e8e654ccb48" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.4.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "43380e6d803e0ce86ebe6de7371e4c8d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.4.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.4.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "8d551cfc50600c07ba80cc347ac2c17d" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "c2e9be30d8f593767d96afe1b97c458a" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "278610971bc04cba4d1730336fb2ca0f" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.5.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "932ac959bf8583be6a7e38dac54bf5aa" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.5.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.5.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "73166be7515dbc5fff2fdc425deb55c3" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "d180bbbac4ff0ed3a3c6c4cccb55d127" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "81c7b44a8082e336553745672870a032" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.6.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "083140dd56a3b126908aa5cad226a053" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.6.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.6.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "53a847597572dad0e3736afeba1b7b29" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "07c172d76e0fa6eca7abcd067ea03ae6" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.7.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "36d8403abf889e69bdc71e4d7f5d1e0e" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.7.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "61de45280f0df1d6b8364ca11aeeadd8" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "0ff64065136f16831a7261aebd556158" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "0226a67853aac9a9cb051a49f7a815bf" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33417472, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.10.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 27879936 + }, + { + "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 27890688 + }, + { + "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 27901440 + }, + { + "name": "language_model.model.layers.10.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27912192 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 27912448 + } + ], + "md5sum": "2b5417b5b44b9f4b5ee013a6d9a4b751" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "82886a55bdf64927bf177d9648f17f21" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 31664896, + "records": [ + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 688128 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11698176 + }, + { + "name": "language_model.model.layers.10.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 13074432 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 13074688 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 24084736 + }, + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25460992 + }, + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30966016 + }, + { + "name": "language_model.model.layers.11.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 31654144 + } + ], + "md5sum": "1d397d851964783c3b77b7cfe25b15fe" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "3bb1714570a24708b89de65710411d73" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 27901696, + "records": [ + { + "name": "language_model.model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 7225344 + }, + { + "name": "language_model.model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21676032 + }, + { + "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21686784 + }, + { + "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21697536 + }, + { + "name": "language_model.model.layers.11.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21708288 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 21708544 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 27213568 + } + ], + "md5sum": "55f51b15151956e0855c4c47cc0fea9e" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "a0d6b425116cb41ab9e25f90035cdaac" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30976768, + "records": [ + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.11.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 12386560 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 23396608 + }, + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 24772864 + }, + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30277888 + }, + { + "name": "language_model.model.layers.12.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 30966016 + } + ], + "md5sum": "a1a5e09a8ddb4080e91612a69289d7a1" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "aa9a50e8c843aca97756a83aec6d63b0" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27901696, + "records": [ + { + "name": "language_model.model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 7225344 + }, + { + "name": "language_model.model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21676032 + }, + { + "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21686784 + }, + { + "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21697536 + }, + { + "name": "language_model.model.layers.12.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21708288 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 21708544 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 27213568 + } + ], + "md5sum": "952d36f2fa1ba790252fdd467285d453" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "727740a4b1a286630607f92f76d4e200" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.12.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 12386560 + }, + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 23396608 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 24772864 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30277888 + } + ], + "md5sum": "2e5084a9ee93c6abb52ded067aa63570" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33030656, + "records": [ + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.13.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14450944 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19955968 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20644096 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31654144 + }, + { + "name": "language_model.model.layers.13.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030400 + } + ], + "md5sum": "3689e7eed69014547d0587a35362bea3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "851b5ae22bccf1d6aed1f7b63258d4db" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "98f94f743d053e028fa31cafe0b3d458" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "b59cc33571423072c7f24572b52a7047" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 33083904, + "records": [ + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.7.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + }, + { + "name": "language_model.model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25815552 + }, + { + "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25826304 + }, + { + "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25837056 + }, + { + "name": "language_model.model.layers.8.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25847808 + }, + { + "name": "language_model.model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 25858560 + } + ], + "md5sum": "66cd1f7ae402ed72897644819f2e016e" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.8.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.8.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "644cd057aa0d1ee9ab9003d476b284d9" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "87884668919948fa2ae643a40e87b73b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "ae5db6adf63059a8911f5946bd557495" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.9.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "94745003bb706cd74ca28ddff4edfd6a" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.9.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.9.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "d5ed5ca7255e9b8afad45ee6ea040e9e" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "1c143b62d992c05107fa48d51d155a79" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "35fd9692b413314801b97253c6ac2fd6" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "d5cefd8214d01efd4bf96260157ca5c9" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33083904, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.13.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + }, + { + "name": "language_model.model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25815552 + }, + { + "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25826304 + }, + { + "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25837056 + }, + { + "name": "language_model.model.layers.14.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 25847808 + }, + { + "name": "language_model.model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 25858560 + } + ], + "md5sum": "cf3dda84daf1d67ce666ee0106e7be07" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.14.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.14.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "898d49c55c013edc73fbc84c590c5a06" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "b45d944602219f0f0a76ec5abce48654" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "9771e7801a1eb2cc41bd9c8846e658ec" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.15.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "9e27cc105e4bc6481f6eba54cc802e38" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.15.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.15.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "9dd9c5b60bd8fe9a9035be4efcbe2400" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "8cbf8569643f9bfa1097735d9a02b9bb" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "0ec3090000de16b58ad6a252402eae62" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.16.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "3d9b2c0d877e2d853ba06658f2d6f73a" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.16.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.16.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "febcb9d652c406282fbd996da82ed2ff" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "3ab1e679e389d4563f1627a4317dcd4c" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "9fad36898904521293b91961779a61e1" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.17.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "b2b39114a4067a6209f61367fc12cf84" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.17.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.17.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "aebe3d6de11ba0bef20a721be1c9f3a1" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "48e5044620c7c64d0a3302c3eaa81f18" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "5e22c249e662527d327b149723136481" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.18.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "c2bb238d0d4e6b97424f221e1d578a3a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.18.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.18.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.18.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.18.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "37a544057de9a22c5a5760f70f0d3877" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "6fb85c599ee41d796b1019774c3336cd" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.18.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.18.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.18.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.19.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "1f2fea5c7d545c86ddc27d784471af8a" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.19.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.19.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.19.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.19.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "f2973c8db1afd236d911263aadf46270" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "0aeab62a1af8e88b7b12783444dc3ff0" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "466d8697b5e0732b844ea24ab9428289" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "24a803a183a62772c6843afbd3e5dade" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 20697600, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.19.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.19.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13440000 + }, + { + "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13450752 + }, + { + "name": "language_model.model.layers.20.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13461504 + }, + { + "name": "language_model.model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 13472256 + } + ], + "md5sum": "d0c045c9ef71f22a608d4c9b167cc26f" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.20.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.20.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.20.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.20.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "cbe73dc1beb706b119c5913cad0163ac" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "4c5759ca2a2dbf0f4c906c6f0c6c523f" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "77031b704e8081f646fd5e686a3f51fc" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.20.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.20.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.20.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.21.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "a63fbbd65864481106bb70ae2dcf3a9a" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.21.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.21.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.21.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.21.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "dee546363c8974f4671362deff3cfc27" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "88975d9d2f7fe79a25fe7318abec4812" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "d9b1c90ea010e15bebd55e8788c9d2b5" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.21.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.21.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.21.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.22.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "bf4dfaec3dc9cc7f29e41d79962914d6" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.22.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.22.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.22.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.22.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "bf656d497c9eece6be8396c2125b509b" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "98bfdb79657c3484f9e321bcda60225d" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "df4c315ad60eec3ee190e4cdef211a24" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.22.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.22.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.22.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.23.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "0918e811f39d4033b42cbbcedf2132d5" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.23.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.23.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.23.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.23.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "fcad4dc38323a27a26ab1e5507f31130" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "0563bf484f4f1a6497cedc889b793425" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "0cc1c9bbe98ebc809b4dcd69fb171579" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.23.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.23.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.23.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.23.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.24.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "c8cfd223513fc2f0c85bc2c6c31a1f08" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.24.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.24.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.24.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.24.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "8bfb68df58b1d2f45268c701ccd3d650" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "53fc135a55bd9a0020ee393e9c48059b" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.24.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.24.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.24.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.25.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "cb72ef30844246e4bb3f12327e346f35" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.25.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.25.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.25.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.25.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "ea582be6b6b04b11de0733365645356a" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "212f4a9e5bc0a005f078b7273206263f" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "f85a773fde84ffb7f5cf85a55e507ba9" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "70651fcf44153404007e94fba4c4dea6" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 20697600, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.25.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.25.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13440000 + }, + { + "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13450752 + }, + { + "name": "language_model.model.layers.26.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13461504 + }, + { + "name": "language_model.model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 13472256 + } + ], + "md5sum": "66eafbbba60f16a2a62f8289c43411fd" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.26.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.26.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.26.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.26.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "6ac5f32b304543dceae6e908495738f8" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "0861afc7f6f591722735834ecd9569f3" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "1392801a8c0d0dac0762488c17a81b78" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.26.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.26.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.26.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.27.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "550c6213f72892b8eaf5cf515313ea45" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.27.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.27.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.27.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.27.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "971db357830ce7718143c01ade29c951" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "eda69062c96e13f66a9d7f4975bcd2b7" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "eb7262f84f25be610c1499c83b1c6621" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.27.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.27.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.27.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.27.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.28.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "011807a6688dd0c354b1c63d8aff4904" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.28.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.28.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.28.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.28.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "33f258f019bfeb7e3d8530639974595c" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "614aa2fec2937f160d2177803249e410" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "6985c68dc56eceffa519af9b6d0cb2c7" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.28.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.28.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.28.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.29.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "acb201e1d7166e7a86caa10c1581bc21" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.29.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.29.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.29.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.29.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "472a3682657aeadb73c2301bd5d0e897" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "1c885a1c804885b9ad133b6434ab7aa0" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "a277f07dc45be2fecfc012e1b7f3ba22" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.29.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.29.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.29.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.29.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.30.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "1ec1b8ae77a7135041770f592a47f551" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.30.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.30.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.30.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.30.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "9b573c68c830bd2ad5335c01e1f0725c" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "3997ccf5187da28fa04bbc972a73d31b" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.30.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.30.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.30.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.31.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "a6d6f8ab0f49f7eb02800ccba4318444" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.31.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.31.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.31.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "5958b211c85160b553a7a53934a6333e" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "6f8e9c6c11c77c0bde0eb97e2016f206" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "ac9945fcac23e4728d8cc388b4a5165f" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "6f77942eaa1dd384bc0526264a14a5dc" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 20697600, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.31.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13440000 + }, + { + "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13450752 + }, + { + "name": "language_model.model.layers.32.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13461504 + }, + { + "name": "language_model.model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 13472256 + } + ], + "md5sum": "c6bb950695e3a35f167b703df98b4821" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.32.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.32.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.32.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.32.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "0e7a54abc559d04c65d86de28eccbf9f" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "3f5d2337b7e0fcbb3029906d297d0527" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "b85f7ede4f459b5f68b321ccc47b86c0" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.32.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.32.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.32.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.32.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.33.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "aa34761916afea2c01f13e694f57e6b7" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.33.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.33.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.33.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.33.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "94fbce6c82539e68e6d64eb78a1eb46e" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "21894cd6aa9b267acd92610e8b2dfac3" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "eca2a3fa3b2208ff1f7541f21f6a1965" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.33.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.33.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.33.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.33.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.34.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "ee683ee945454d2031768f36df82dde0" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.34.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.34.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.34.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.34.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "9850dee24bb0effec62e3847db616a8d" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "391c2502c1c61c4f43bee079055869b7" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "3719829644565c62ab1df8a2a535ed6f" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.34.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.34.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.34.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.34.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.35.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "febc817ede03e6265d0a9da9c4f7c664" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.35.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.35.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.35.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.35.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "40dd982c0dd58a1534bddcd98ecb0f2c" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "d31214892a13390d3efc055818101e0f" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "fffb913b9234bec2a09a1aab2fbff1cf" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.35.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.35.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.35.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.35.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.36.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "4df417050e94e50e9b6b46f801b731fd" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.36.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.36.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.36.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.36.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "aff8c5fbba05f4d4f8ba7e3a20f5a978" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "99bd2f215755c2b8532337a122f8d9c4" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.36.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.36.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.36.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.36.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.37.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "7b5258f91b752859dd825751c2574f72" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.37.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.37.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.37.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.37.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.37.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "c508253b8869330be58f29ca6fdce514" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "ce51857e7915eb169a4e132dce16db74" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "1d4913a1c170ae141fe52efa59011cd0" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "fdaa3ddcc186df0d28f89c4b3d42b3c0" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 20697600, + "records": [ + { + "name": "language_model.model.layers.37.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.37.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.37.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13440000 + }, + { + "name": "language_model.model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13450752 + }, + { + "name": "language_model.model.layers.38.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13461504 + }, + { + "name": "language_model.model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 13472256 + } + ], + "md5sum": "0c63bd608cb308a8d8e5e3ca347be215" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.38.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.38.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.38.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.38.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "fd3ff952f9dd9f5b320e11c5e7e53259" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "e6aee39ab69fd3ac1481182fad1a22c1" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "4d9d99e8ae1640df7a66782521841b3f" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.38.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.38.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.38.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.38.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.39.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "f7f0f259a4cc617d7bdb7b4a4189ba9e" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.39.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.39.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.39.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.39.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "48f626fc53e868be0abc124d15cced55" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "6c4256351bedd91069ebb91c58bd5d4e" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "d3b42d89f97974581354177e53c3a1f5" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.39.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.39.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.39.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.39.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.40.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "422d17a450d78b04e6e264ed885ed9c9" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.40.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.40.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.40.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.40.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "dc5767ab9a5374b85d352bced016b3da" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "9339b395c802fd7b52be555a831ec038" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "49e276dfa6709273e8cadb55a77ae8a7" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.40.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.40.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.40.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.40.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.41.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "d257ad6d990c53063789faec1fc084a4" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.41.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.41.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.41.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.41.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "498dcf98049d412731894dd56a5f2a98" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "39f6dba8a2fcf4b589170c14eb6836e8" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "5175de7d44e7a62b91de62fb14e25cc2" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.41.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.41.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.41.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.41.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.42.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "f6298a2f84dd7b4f7ce46c6a4ced01b8" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.42.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.42.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.42.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.42.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.42.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.42.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "e14d0739df802073a0154da2ec9cea87" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "80e6bf39d456ad25b393059bfc2e47fb" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.42.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.42.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.42.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.42.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.43.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "9ffc23316c147971d0fab13f7259fc85" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.43.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.43.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.43.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.43.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.43.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "c0546e1e960775158c3f1e7212826a9f" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "845d12a30658d0e7881919a717049390" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "e8ae707ecb4eefb7ad071413a5fa740a" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "ea60cd0a40fc90e21642928052139380" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 20697600, + "records": [ + { + "name": "language_model.model.layers.43.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.43.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.43.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.43.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13440000 + }, + { + "name": "language_model.model.layers.43.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13450752 + }, + { + "name": "language_model.model.layers.44.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13461504 + }, + { + "name": "language_model.model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 13472256 + } + ], + "md5sum": "aa6ca6895b563f3101dc385b717df518" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.44.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.44.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.44.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.44.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.44.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.44.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "8e9e29bf49c452e4c4d1708996db8b6d" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "468bdb5e83a59d5b806ac0adcb7d94bd" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "75622421fe022ada28ef4209a1a998ef" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.44.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.44.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.44.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.44.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.45.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "fb30c259e685bd35a11cfc1af3a1e036" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.45.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.45.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.45.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.45.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.45.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.45.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "760d251c0b43c859002d2c44f8e334d0" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "bcdfe174dc5a6c88418d9733d011d3f3" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "6e0c591ffea38ba2bb0e0884737cfbc2" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.45.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.45.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.45.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.45.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.46.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "2b1af07cf2cbd4f503b3dfbac96b86db" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.46.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.46.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.46.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.46.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.46.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.46.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "d5b67fcc1673604954d76b52a9558254" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "a4ecc468b6b742c09be2bae9098519ad" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "5124b45c07c83143ae4e303aa77ee429" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.46.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.46.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.46.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.46.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.47.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "795ca27de121362d985761171bd137f3" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.47.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.47.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.47.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.47.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.47.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.47.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "da4065081a1cf050690fccd5fa481910" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "f1d107f206740e473fd46d8ebc75578b" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "58dcc218b0cdab204ef3d93407a1875c" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.47.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.47.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.47.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.47.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.48.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "70611e7d8f9edfb6162b83315dddf4c7" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.48.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.48.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.48.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.48.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.48.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.48.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "aaf98ae5af41a274e39d2c06ed7b5e66" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "04a33b4875d9530fa94ca2d387077c1b" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.48.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.48.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.48.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.48.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.49.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "8a439272512ede5d303f17e0b6f8238f" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.49.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.49.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.49.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.49.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.49.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "0f4a9645385f96a90435624fa2343618" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "ed53bc1ca1a3fe2247d30b7afaaf1219" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "545144c211b1502cf846a11ee4352b69" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "a8f839233baeb4fc59eb4f6a09478909" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 20697600, + "records": [ + { + "name": "language_model.model.layers.49.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.49.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.49.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.49.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13440000 + }, + { + "name": "language_model.model.layers.49.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13450752 + }, + { + "name": "language_model.model.layers.50.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13461504 + }, + { + "name": "language_model.model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 13472256 + } + ], + "md5sum": "d5d23346d6e6df27598ebf6c8cdde238" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.50.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.50.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.50.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.50.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.50.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.50.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "a90145555684455cce13688483ebf895" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "732acccf71005cd4a40062e89395ae54" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "de5355f8aa5e6c1c6c70265a6ef8b037" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.50.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.50.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.50.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.50.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.51.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "8b0500f2ca0875a67cce1187dcee17da" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.51.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.51.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.51.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.51.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.51.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.51.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "bd2d9143ba0a926122087455fb634874" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "c8af90da8b09c92d271890f9cd7be1b1" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "5eefaef12f6d1e677c200634d92b2f5e" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.51.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.51.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.51.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.51.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.52.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "9c90bb226e329a375d7d48af5f43ae0c" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.52.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.52.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.52.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.52.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.52.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.52.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "db600c564b14228685f840f50d666b08" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "5b74c3417087b9a61fe145805d492ae2" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "edf486e5ef6240e1b79892a448082362" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.52.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.52.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.52.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.52.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.53.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "65cf4cee5b338a76e596e7a32915aca7" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.53.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.53.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.53.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.53.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.53.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.53.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "26d9be568e7b41bfcc19b6ee01610097" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "044076017b8d0d0367a07ada211cdf8a" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "2356bfb6385a70676d19ea8ea00b9010" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.53.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.53.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.53.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.53.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.54.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "9c64787b3d509a0cf1b5e0497a586a29" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.54.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.54.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.54.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.54.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.54.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.54.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "dede5b344cbcda8fdd0f6515b205bb77" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "8ce7a6357e16d8d1f2b5039b78b3db64" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.54.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.54.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.54.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.54.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.55.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "fe4ff356745c1f54a6916675ab316d2c" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.55.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.55.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.55.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.55.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.55.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "196f3e30ec592576f5a0aa658ef594cc" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "9dc92e8f579b79eaf10dd4f528e9b85b" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "bbd52bd0bd0af25315ab1aabf78e1c47" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "d9b0b12c2aafa9787172ede81df2b994" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 20697600, + "records": [ + { + "name": "language_model.model.layers.55.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.55.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.55.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.55.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13440000 + }, + { + "name": "language_model.model.layers.55.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13450752 + }, + { + "name": "language_model.model.layers.56.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13461504 + }, + { + "name": "language_model.model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 13472256 + } + ], + "md5sum": "a9d215ce712eed7c3fccec6cd035eec0" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.56.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.56.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.56.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.56.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.56.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.56.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "ed58c47835637cdb207f3d76ff483359" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "c0c87121baf8b43b43f204e946070138" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "c80d83918d0c4f7064b1c3c5d5e1f13e" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.56.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.56.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.56.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.56.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.57.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "169a62e14177e62c8f3b680271261e72" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.57.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.57.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.57.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.57.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.57.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.57.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "f119b1e303a5b22546ae5f058d667e57" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "30c27de69f19134d1a0718dd2989ff86" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "4342fa0688242fd2064ffb47c2f388d1" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.57.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.57.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.57.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.57.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.58.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "b069324bf17c8b927d4c888e0264cc57" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.58.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.58.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.58.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.58.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.58.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.58.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "e17785f082402b6bef8a41800d976a07" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "48175271c106b28fc74e8effe12e07ca" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "914f3b9c108fe4e11c29b5121ac0fa44" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.58.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.58.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.58.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.58.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.59.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "a9884760d4956b13e9940c2d76e66dc2" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.59.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.59.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.59.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.59.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.59.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.59.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "357d802159e45b8fb5ea8f57b6dd7dc7" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "acf947d3b6562df95b7070a93d6072c6" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "4c3291401d5204bcb3b26d295b790673" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 25815552, + "records": [ + { + "name": "language_model.model.layers.59.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.59.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.59.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.59.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.60.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 18590208 + } + ], + "md5sum": "f8ad1366447eb6dae65935574b9a8684" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 33062912, + "records": [ + { + "name": "language_model.model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14450688 + }, + { + "name": "language_model.model.layers.60.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14461440 + }, + { + "name": "language_model.model.layers.60.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 14472192 + }, + { + "name": "language_model.model.layers.60.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14482944 + }, + { + "name": "language_model.model.layers.60.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 14483200 + }, + { + "name": "language_model.model.layers.60.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 19988224 + }, + { + "name": "language_model.model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20676352 + }, + { + "name": "language_model.model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 31686400 + }, + { + "name": "language_model.model.layers.60.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33062656 + } + ], + "md5sum": "829f635f1657662c07c79e600e097cda" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 115605504, + "records": [ + { + "name": "language_model.model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 43008, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 115605504, + "byteOffset": 0 + } + ], + "md5sum": "7b43c30db99247637693be5ba1dc8370" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 33030400, + "records": [ + { + "name": "language_model.model.layers.60.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.60.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 11010048 + }, + { + "name": "language_model.model.layers.60.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 12386304 + }, + { + "name": "language_model.model.layers.60.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 17891328 + }, + { + "name": "language_model.model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 43008, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 14450688, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.61.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33030144 + } + ], + "md5sum": "380cac5823f2839c10d7fdf6c4468ab5" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 30966016, + "records": [ + { + "name": "language_model.model.layers.61.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.61.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 5376, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 5376, + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 17203200 + }, + { + "name": "language_model.model.layers.61.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18579456 + }, + { + "name": "language_model.model.layers.61.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 18579712 + }, + { + "name": "language_model.model.layers.61.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1376256, + "byteOffset": 29589760 + } + ], + "md5sum": "2e38cb2b517009324b94b1f789e58704" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 57802752, + "records": [ + { + "name": "language_model.model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 5376, + 2688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 57802752, + "byteOffset": 0 + } + ], + "md5sum": "17d7ec7223d34969a110b3955c970963" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 13472256, + "records": [ + { + "name": "language_model.model.layers.61.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.61.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 168 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5505024 + }, + { + "name": "language_model.model.layers.61.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 6193152 + }, + { + "name": "language_model.model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 5376, + 672 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 7225344, + "byteOffset": 6203904 + }, + { + "name": "language_model.model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13429248 + }, + { + "name": "language_model.model.layers.61.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13440000 + }, + { + "name": "language_model.model.layers.61.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13450752 + }, + { + "name": "language_model.model.norm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 13461504 + } + ], + "md5sum": "09898a89d7a7659814165771c0c7c48b" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b24cda97931b5628e33a8694d31f59be4cb729e --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8979721e881d865eb83b21d1a2120d5f655107ae3358dcd1fa1ee1f67231ce1c +size 704815104 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..14d327ad85e1f0c71ad35f961339e6621a059680 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18123265c4e613e721f7ec214ddd128fd6524cf0f2ee2ea033a8c0d58a442720 +size 88101888 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb44098cb7098a36924c8782a9cb2a4fd31ef238 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a32a305ef9211ca06c79cc9396d4ddab4c81b08f59f3d4bff41e026ab46780c +size 115605504 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..87d5400885e0fe95ac7f1e8bd857b1c614835c8d --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b2dbffcc729d77c35c1af1e396b9c2ea6b7343384f45d9d96cf65dd4ae7fe2f +size 115605504 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..842950a303b39617804b5639e6ac01e2f5f48be2 --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66fde79b7a0f46b2623008d8cf1433a251760c6c22b01e9c316b9279976cb840 +size 33030400 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..a53c7e3ea298866779c33690bfb5beaaf25244d5 --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7e7d12bbc5a9e034796cf8a9cddb9c734aa44ce48ac9c4345a480aa81d4fb0 +size 30966016 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..b06aee1bfaa69ce1edcb821981131b9fcf906b61 --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9759c8438db461f10e82a321d17bbeee774d75e338302c311404428c36c280fe +size 57802752 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..52f6d70cab67423a834bc2397aa3cd44b05c2326 --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1011dd0bd6c770f8cc11f17568db5441ff0ab5b24338dc9233a0460af0dddd24 +size 57802752 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1adbf918073e23a4e915dc548eeb64406f250c8 --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7511e882c51bca01cc31b93911be1b336280123d947d14eb120138867bb920c2 +size 115605504 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..d8ce5167600480bb8d60b864e9ed8139c713ed17 --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f823bdf08cb1c26de886f8baea5307020597bd67b59f4a5ede3f3e44940bf46 +size 20697600 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..da85372c40bfcebd1f3a18d024be1b87f1c20adc --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:334bcfebb33209f807345bf24a765d7888cac3e9f91080b3f55dccdc3d4485c0 +size 33062912 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c3bd637bd1d359b23117d5add8bd6611e4479b6 --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6462dca9665218426131f929c5c88892e2c04bdbc3baae4ce02d55b95687115 +size 57802752 diff --git a/params_shard_109.bin b/params_shard_109.bin new file mode 100644 index 0000000000000000000000000000000000000000..08dc6f9f172e8a21c461544734b10d912d0c7ae6 --- /dev/null +++ b/params_shard_109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ee5368e80670290b3e4b4dd5dc2a89b33992bbfaacc9476ad96b0631357f63 +size 115605504 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..1adc4319c4b35092a1b4a8fd07e1cae59efecd6c --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5170ee5775d200264cb39964496c59d5db422f95ce992cb4b0edbc77959e45 +size 33083904 diff --git a/params_shard_110.bin b/params_shard_110.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d3465b64e8b707ce6c62f2053bc226fff1c2464 --- /dev/null +++ b/params_shard_110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:005ed4013dea9b15f8d441ab8f6fb7a5195832a0080470d4bfc7add8e5d8d92e +size 25815552 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..24f91bb9c282a60b9e58f429628c5975f4e0936f --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c44d521200c79a22c8b1f7096304589b98ad08e0aa50c79c7f3be5aa1ec787 +size 33062912 diff --git a/params_shard_112.bin b/params_shard_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fc0ccca5fbc68fd1d7ab9873142f3d174e1c54c --- /dev/null +++ b/params_shard_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dfea17d67f22d72c23fc0ad8a74132fbf06b5f480c0b8d0516565a650621495 +size 57802752 diff --git a/params_shard_113.bin b/params_shard_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a996a3c8af97f136f53cb465ec79c6a474d238f --- /dev/null +++ b/params_shard_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a23bf263ec3d83ea37e25804a09830d99d638db2bf712a37838048c52f8ff8d5 +size 115605504 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba1df36a813b4b705cadf7001ae151bab31aa3c0 --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce59e7872ae231b9956ff80a51427fc854d8a47b5e24e9f9438639b6106d197 +size 25815552 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..99c418bb74d84dfa38f1a6a600a14f0534149886 --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df5885293f682be63f35b7d0db793c6076d8101afb01322eeaee6ae6cb2f42c2 +size 33062912 diff --git a/params_shard_116.bin b/params_shard_116.bin new file mode 100644 index 0000000000000000000000000000000000000000..7f404b01018cdbf8e36e4191cd2e921cafd6514e --- /dev/null +++ b/params_shard_116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07e194468f28c74618d5d03e21ac722d3b1bfbf089d3cde833fc050dfb09b6d +size 57802752 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..9adb797de722bb0c6632bdb95bfa53e8b2312b1e --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:681d02fd744b56975ab95a52028bcb2abdc78564aab0e3cd445d94acba9feb20 +size 115605504 diff --git a/params_shard_118.bin b/params_shard_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..d09f00244f2293b16a9677fd29afcc08eec24462 --- /dev/null +++ b/params_shard_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe42cb29a71a89a19de29c49de313cd4ceee1991d52f3139f01365fe4c8b11b +size 25815552 diff --git a/params_shard_119.bin b/params_shard_119.bin new file mode 100644 index 0000000000000000000000000000000000000000..f96241984bd99c3e2f36ce046a2456d88930e995 --- /dev/null +++ b/params_shard_119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2ba853039f6d4b503eccc1fafa671976c4abf04ba6a72d48aed82fc2d390e7f +size 33062912 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..273455ef61c560116666b3f3f728a3dd756b030d --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da8ad37be9437e40aeb205ec70aa4bf349b5f4bf63df36fd344063bd3421ff67 +size 33062912 diff --git a/params_shard_120.bin b/params_shard_120.bin new file mode 100644 index 0000000000000000000000000000000000000000..eefa18491b1bdfe4257aa28f8034f83bc37ed0e8 --- /dev/null +++ b/params_shard_120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce1c6eb9451207d5cd730a3b8575afbf1fbeab616b92dad6d38c1171c3fee61 +size 57802752 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..56da2cb5fed13520ecb8c1aea2fbdba69dcf84a9 --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6720f40e5154c91e4f35e62b4bd2c4c669608f1a63d148b43fd4c3640e49c4b5 +size 115605504 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..a14380373ad03434d370c62868764def00e3add7 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6b7401811de92b931c1d55e22463c74bc430ba9efbceda7329b8fb73a4a4daa +size 25815552 diff --git a/params_shard_123.bin b/params_shard_123.bin new file mode 100644 index 0000000000000000000000000000000000000000..8c88dbf2ca93ba41a7bb29262dbcdde0f0631f79 --- /dev/null +++ b/params_shard_123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181824fb0a1d8d04c9a726902b7febc2910e6215995f8970b1f07e08d05493f5 +size 33062912 diff --git a/params_shard_124.bin b/params_shard_124.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0bac4193a32430aa36048f420bb1f852198f1a2 --- /dev/null +++ b/params_shard_124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d4eaac6b6d0842ff6a5b08792a344e4d9a3fafa08168e20b5e7c4e7c92f170 +size 115605504 diff --git a/params_shard_125.bin b/params_shard_125.bin new file mode 100644 index 0000000000000000000000000000000000000000..de9b4aabf1c20cc3e3f01f28971b867fc2c0aff0 --- /dev/null +++ b/params_shard_125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af3396cf0a961ffc621adc4b2b5d53dbdd4e5ff77f8e1213469603fac3f5e02 +size 33030400 diff --git a/params_shard_126.bin b/params_shard_126.bin new file mode 100644 index 0000000000000000000000000000000000000000..6bc12058124a8aac7fdbb5f817af8b3fc5928a87 --- /dev/null +++ b/params_shard_126.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcc6a363b62d347aa2949bf912a9ad50bd2eb91e97711d3c36ffbed8302abbd9 +size 30966016 diff --git a/params_shard_127.bin b/params_shard_127.bin new file mode 100644 index 0000000000000000000000000000000000000000..54bc3862d398b84227204c7682559e00b06806ed --- /dev/null +++ b/params_shard_127.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608a9df732629ec18e1ed69435c53134df3e389b948b336f5276353907aec950 +size 57802752 diff --git a/params_shard_128.bin b/params_shard_128.bin new file mode 100644 index 0000000000000000000000000000000000000000..70b8bf44dd258f1527ce6d3435c72087bb75ebac --- /dev/null +++ b/params_shard_128.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede2c55c5fa6cd6114e23430718206a0e168ccda09c2fa3c0fa24f2551d445ee +size 57802752 diff --git a/params_shard_129.bin b/params_shard_129.bin new file mode 100644 index 0000000000000000000000000000000000000000..2f3308e9204c44b385c4f4dcd6845616202307ba --- /dev/null +++ b/params_shard_129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c14430bca9d8b126c29f4e4cf6085947396775e47dfb1f7d1624f75db238875 +size 115605504 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f92b265e1d28dda2e293b12c90661e58bd8e3a4 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c32b6c7d08013cb52977113c7c276440bd1e3342605ce20c06be5ba40f640a +size 57802752 diff --git a/params_shard_130.bin b/params_shard_130.bin new file mode 100644 index 0000000000000000000000000000000000000000..da74354521f82d35ce9449157278956adf56c10f --- /dev/null +++ b/params_shard_130.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd34f4ff2b186813cc8e191867d3dd047ea560011e0cfee2b4c22fff1030106d +size 20697600 diff --git a/params_shard_131.bin b/params_shard_131.bin new file mode 100644 index 0000000000000000000000000000000000000000..5754dd222298cffa7b093d25413191a63a31eff4 --- /dev/null +++ b/params_shard_131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098f49e064349704b67101c03f01db7db93e4b440dd56dd3470c7e66f2bc9297 +size 33062912 diff --git a/params_shard_132.bin b/params_shard_132.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe5cfde6376965fb6c468c8ccdef9a6b11b101f2 --- /dev/null +++ b/params_shard_132.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8739064a6536b8519738651d2fb92c1918830e867b4f51e2e3db858168c3767 +size 57802752 diff --git a/params_shard_133.bin b/params_shard_133.bin new file mode 100644 index 0000000000000000000000000000000000000000..39a4deafcf8ceb764568aacc27b575418b1616b6 --- /dev/null +++ b/params_shard_133.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1528d6f05ffc66e4da144a543a0c7a26f7bdbace0c048f200fe0af4bc96b8efd +size 115605504 diff --git a/params_shard_134.bin b/params_shard_134.bin new file mode 100644 index 0000000000000000000000000000000000000000..7657ee80b5c4690d7b2f6ad3271034f929e3cce2 --- /dev/null +++ b/params_shard_134.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4198b4d387a462da405a3a04e2bea4c7f7765c9ce55a30212be15a908271a360 +size 25815552 diff --git a/params_shard_135.bin b/params_shard_135.bin new file mode 100644 index 0000000000000000000000000000000000000000..df33ee9ecbb01056e1087e9c7ad514d02033880c --- /dev/null +++ b/params_shard_135.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf37fa81dcc174907c32a81e7343f07ca9f77658cea4e7f0f84c0bfe5bfbbdf +size 33062912 diff --git a/params_shard_136.bin b/params_shard_136.bin new file mode 100644 index 0000000000000000000000000000000000000000..660a9bb8853bc6d93de8dab3d307fe650af3990f --- /dev/null +++ b/params_shard_136.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6edc35984a2c84b0c5413e95909578f76d41c0d83f5b27d4ad87f151ff6c723a +size 57802752 diff --git a/params_shard_137.bin b/params_shard_137.bin new file mode 100644 index 0000000000000000000000000000000000000000..289cc04b6824e781a0981a87aaf037881f7a79a1 --- /dev/null +++ b/params_shard_137.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db68c696a5c3c959d957789e8ab89cc3a227120d0e1bbb379bcf8c3d4523bd94 +size 115605504 diff --git a/params_shard_138.bin b/params_shard_138.bin new file mode 100644 index 0000000000000000000000000000000000000000..d10592160de8d4bdbbc2bbc0d690e7090bee9b04 --- /dev/null +++ b/params_shard_138.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0cba33e9d04bc529761572ef78b26ed5c02eef43375635e91340c9b203df65 +size 25815552 diff --git a/params_shard_139.bin b/params_shard_139.bin new file mode 100644 index 0000000000000000000000000000000000000000..81ca62fbd0c636bb8e91cd70200e4d97fd5254d2 --- /dev/null +++ b/params_shard_139.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5009e27295637a7216269d28019841a6613420520d30cb9198970b927d43ec08 +size 33062912 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..32f7a848ffddb947fdc514fd11d428880c12daac --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c64fa489afc396612df9a62360da9600c9dfbf7d0bd97050320019f17d8550e +size 115605504 diff --git a/params_shard_140.bin b/params_shard_140.bin new file mode 100644 index 0000000000000000000000000000000000000000..023fd71266a7eb52b5ef1c282cb6ff1421461faf --- /dev/null +++ b/params_shard_140.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7aee0f8216373ddbedcfdb93563bf2ddb9d14173f351c2890f2dfb947f94937 +size 57802752 diff --git a/params_shard_141.bin b/params_shard_141.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fa2a49c9da617d7d3aa2f8b25860f563e4b6115 --- /dev/null +++ b/params_shard_141.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:153831338156e5ea1da9059a1ec0a20ca87bff0c7588350891874eb0b1cf2330 +size 115605504 diff --git a/params_shard_142.bin b/params_shard_142.bin new file mode 100644 index 0000000000000000000000000000000000000000..676fceb540eb34d3e8ff78c8be37784269793bb6 --- /dev/null +++ b/params_shard_142.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37585173584a1d4d2100276f7bc8f8f23722a9cdff90324192e1a6fc87011868 +size 25815552 diff --git a/params_shard_143.bin b/params_shard_143.bin new file mode 100644 index 0000000000000000000000000000000000000000..0c491e2575b7e26df7fcdf8345991a0fac79250e --- /dev/null +++ b/params_shard_143.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342e7ade3560af55e8487352b1677ed9a0f6d01a159c989e7f70dad40e21bac3 +size 33062912 diff --git a/params_shard_144.bin b/params_shard_144.bin new file mode 100644 index 0000000000000000000000000000000000000000..190c36c035f8ce319b01b08fd835e092b4e18e4d --- /dev/null +++ b/params_shard_144.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d442c2c641461f3c85b55af1ccfc1dcb420940ef2872c94ee68f07947a6d2431 +size 57802752 diff --git a/params_shard_145.bin b/params_shard_145.bin new file mode 100644 index 0000000000000000000000000000000000000000..e91344da94d55b99dfe4cb70aea4ae53de6553ee --- /dev/null +++ b/params_shard_145.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36af59a098494376b8cf15133b5c03f0036556634802b1ea366769563818bb35 +size 115605504 diff --git a/params_shard_146.bin b/params_shard_146.bin new file mode 100644 index 0000000000000000000000000000000000000000..2b132bfbcc099a9910798d04e49a340710f4c988 --- /dev/null +++ b/params_shard_146.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6600de610bd99b1326238f93595e388d8576ce343225a511e03177cdac315317 +size 25815552 diff --git a/params_shard_147.bin b/params_shard_147.bin new file mode 100644 index 0000000000000000000000000000000000000000..468e8842b786b42c00660c50ba7cc1683a848d75 --- /dev/null +++ b/params_shard_147.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d503d7676992dcbbafe53463df169a3766ff4c7662add1441b62c38d375b7883 +size 33062912 diff --git a/params_shard_148.bin b/params_shard_148.bin new file mode 100644 index 0000000000000000000000000000000000000000..fea3c640c753ad716f330f0413a056497b527ffe --- /dev/null +++ b/params_shard_148.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115bf5c5d915cba793b8b1aaa0c7d3105a7acac917d7e5bfb98f830a6b4edfb9 +size 115605504 diff --git a/params_shard_149.bin b/params_shard_149.bin new file mode 100644 index 0000000000000000000000000000000000000000..3de9218ee3edad1f1950f88a1d28072c0398b06f --- /dev/null +++ b/params_shard_149.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6eda16ad930963ae732c12083431f332e4f70c261a5bc26d5f69a7c14031bbe +size 33030400 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..8997e70a8604aec24f827c8c1e6001a647700a9a --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:231e715ffe8523d5c5506a2bdb9f7a8445947c2f1a198d2998243e3d20009bf3 +size 25815552 diff --git a/params_shard_150.bin b/params_shard_150.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f71fb0af57f3429e4738f10dff9b720bff923e0 --- /dev/null +++ b/params_shard_150.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ed6e32e1dfe1cc04065a3b3029fcedd84611e747ae11e7d65bb04e9722e4ba +size 30966016 diff --git a/params_shard_151.bin b/params_shard_151.bin new file mode 100644 index 0000000000000000000000000000000000000000..f866e99ce8de2b11e96dbd4a25c3eb8e0a100868 --- /dev/null +++ b/params_shard_151.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e727105f8bb9bbf551061c9bf59b8851c895c79032309f449bb57395a8e69ba +size 57802752 diff --git a/params_shard_152.bin b/params_shard_152.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6d9ce7434215c4f31a6eed9e3ef5bfa408ffd09 --- /dev/null +++ b/params_shard_152.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b4bed66875d3e137cea52494d1ff7d4bd8a0f954363f62c247e1abf5a17f68 +size 57802752 diff --git a/params_shard_153.bin b/params_shard_153.bin new file mode 100644 index 0000000000000000000000000000000000000000..8da9f9041c27f1389f570158e0b3a0e501347ae6 --- /dev/null +++ b/params_shard_153.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4abf54349a0d24ce65d658b76e2f4c44926f6b361a52832326b54261f3ce061 +size 115605504 diff --git a/params_shard_154.bin b/params_shard_154.bin new file mode 100644 index 0000000000000000000000000000000000000000..10ef64552e9dba788dd696b7f9b8ecd24d952611 --- /dev/null +++ b/params_shard_154.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4912dcb982cebd70194c10e9783b7803aebe23b345b9748410d57e9eb1d4bd +size 20697600 diff --git a/params_shard_155.bin b/params_shard_155.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6a2a8e6e3c4bfbd8c7e84e1d1c8d7e82e40e8a5 --- /dev/null +++ b/params_shard_155.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:736a6c9cb2677d02a942dc5d5540c95e2a1ed280526ef5d69fc21cb015ae699b +size 33062912 diff --git a/params_shard_156.bin b/params_shard_156.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7162058d76effb5a0698df6127fae2c1e4c3fd5 --- /dev/null +++ b/params_shard_156.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18f1131ca2ee40fb83ab033cb637758d61663f426e02d2f7402c936c085e517d +size 57802752 diff --git a/params_shard_157.bin b/params_shard_157.bin new file mode 100644 index 0000000000000000000000000000000000000000..d70c7346199b62729a174b9d068511d55b13cad3 --- /dev/null +++ b/params_shard_157.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32001eeabcf9f0fc39e124854d17ccc90bd278332ff5b1c119fd47bad60d201 +size 115605504 diff --git a/params_shard_158.bin b/params_shard_158.bin new file mode 100644 index 0000000000000000000000000000000000000000..46df392d3ff1eafd3e2b609c7d41b293cbf803f8 --- /dev/null +++ b/params_shard_158.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7ab0283d4eec31df454e11a6a06a6906a28ddd2068e876ede7985d22eb6550 +size 25815552 diff --git a/params_shard_159.bin b/params_shard_159.bin new file mode 100644 index 0000000000000000000000000000000000000000..044d9cd984ba4c589eeff04bdf412b531728f763 --- /dev/null +++ b/params_shard_159.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5866bd62755c33ad05a897bdcb38097db17f3c2e5a717b26a18fab58ef4121ef +size 33062912 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..42867561223df76636048e0eff978a477a28f604 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28cec9f408bf35497036d2a473e974e1cc966b2f35857e5fcbee853e63d064b +size 33062912 diff --git a/params_shard_160.bin b/params_shard_160.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ce13a4d4e7bbfc0543b5485db8f39a8d1da1f5a --- /dev/null +++ b/params_shard_160.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481670751a657da41976649906ed736e7118349358d7efd510527ec8879cb6f1 +size 57802752 diff --git a/params_shard_161.bin b/params_shard_161.bin new file mode 100644 index 0000000000000000000000000000000000000000..65aac4ec7562656ea916a5a1da8418e34a0c4942 --- /dev/null +++ b/params_shard_161.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d7970e9a28156f5c728824c01b45155c8962d6db8f364793f8dc3d30e2bdeb +size 115605504 diff --git a/params_shard_162.bin b/params_shard_162.bin new file mode 100644 index 0000000000000000000000000000000000000000..6aa15536cd2a17043925a4a50b53d7ae409dbe9a --- /dev/null +++ b/params_shard_162.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ef1a9098da75252e5eff95872bdd889a31a70b21369be38b89e3d956a39a10 +size 25815552 diff --git a/params_shard_163.bin b/params_shard_163.bin new file mode 100644 index 0000000000000000000000000000000000000000..017873f293c13047a534023360ff21ff918c75b0 --- /dev/null +++ b/params_shard_163.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf04c2fa55c35f57d6d01cbb5afcd8a779a0e29467b6d8ca72a0fb9e95edc526 +size 33062912 diff --git a/params_shard_164.bin b/params_shard_164.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4fbd82eba56344a8436a0ca2ad0ba5d8917ac31 --- /dev/null +++ b/params_shard_164.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406b96a0ae67c6a39323f0c96c194d858ace75c923fb82d1af9b4bd0897106df +size 57802752 diff --git a/params_shard_165.bin b/params_shard_165.bin new file mode 100644 index 0000000000000000000000000000000000000000..64aa0a980da9757fe1325a85e07162c28d6e93ef --- /dev/null +++ b/params_shard_165.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f35612182f2c696b748f07aa52344024aad5269f794bc79de6712905eec6dd59 +size 115605504 diff --git a/params_shard_166.bin b/params_shard_166.bin new file mode 100644 index 0000000000000000000000000000000000000000..798ae4ab313b6ba14da917a686a4d35e11198da6 --- /dev/null +++ b/params_shard_166.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d6d6f866cb6342ac3e022a9fa55ff44aa7a1236bb995b3fefb179288f8b1a7 +size 25815552 diff --git a/params_shard_167.bin b/params_shard_167.bin new file mode 100644 index 0000000000000000000000000000000000000000..487d0133b9e968e8dc7f42481486c043d725a48a --- /dev/null +++ b/params_shard_167.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b9a09620b98fc4372837536609a8ca370bf085f3cc8e308c3d58b912a44ad6a +size 33062912 diff --git a/params_shard_168.bin b/params_shard_168.bin new file mode 100644 index 0000000000000000000000000000000000000000..172ebb353aa2807d7ec6b278fad73ba5520f3733 --- /dev/null +++ b/params_shard_168.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faaf72c2a3029d42f18ad3d008225a1c7f9231a3404dbdcfdb21af0a288d4019 +size 57802752 diff --git a/params_shard_169.bin b/params_shard_169.bin new file mode 100644 index 0000000000000000000000000000000000000000..c209bc85d388e6d735818e5c1b00a059298563d1 --- /dev/null +++ b/params_shard_169.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a93e4d69e09cad6d41925ad3e8428689a6a20f52f39d1b294fb3f4a9db1be8e +size 115605504 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c1adf0939fb2e2cf930fc969e4516ca8b0c2725 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd36ed679ccb7379f79f97981cccdfba1ac2ff7e1237d267aeb4907a2ecb3e6 +size 57802752 diff --git a/params_shard_170.bin b/params_shard_170.bin new file mode 100644 index 0000000000000000000000000000000000000000..59dde0e53dd32c41f96551edf78336977f5b1723 --- /dev/null +++ b/params_shard_170.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:960a80ba63e8a4653d95a9c36ee72522e7f6a58a322225c17ab9b48d1970dcf8 +size 25815552 diff --git a/params_shard_171.bin b/params_shard_171.bin new file mode 100644 index 0000000000000000000000000000000000000000..81e520bd5a6c8611afaec0a46683a6aeb1585832 --- /dev/null +++ b/params_shard_171.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764f37fd5cf2bc0deff1bbb87c9a035edca6bd5739a56fb039a7dabd8a9f267e +size 33062912 diff --git a/params_shard_172.bin b/params_shard_172.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a3f9a28ff8d9502c13d837e8a16c46de654c2f6 --- /dev/null +++ b/params_shard_172.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34faf9d963c05cdfbba1bb02fc06ee7c05d0b47792d18d1d49cb345ad8624c19 +size 115605504 diff --git a/params_shard_173.bin b/params_shard_173.bin new file mode 100644 index 0000000000000000000000000000000000000000..6501c5b1631fa44f111ac615e3a91bb40c00da6a --- /dev/null +++ b/params_shard_173.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa373dba1f8d10540e849ec84a9c5613cd0ab2d0297513442084451b7349fb22 +size 33030400 diff --git a/params_shard_174.bin b/params_shard_174.bin new file mode 100644 index 0000000000000000000000000000000000000000..be5e05044b0a5f72751cb9a73a1e0276c3874c7e --- /dev/null +++ b/params_shard_174.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0f48a2862ffb832509beb17911769c1790b11339ac37fa17d32cabcea6eb90 +size 30966016 diff --git a/params_shard_175.bin b/params_shard_175.bin new file mode 100644 index 0000000000000000000000000000000000000000..1689f6fa7e723b64ef3c86ba078712178fc3a2dc --- /dev/null +++ b/params_shard_175.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807736d77302762b5485be3cb83bfa71e71086987c26564ac31d595f84e95c32 +size 57802752 diff --git a/params_shard_176.bin b/params_shard_176.bin new file mode 100644 index 0000000000000000000000000000000000000000..166e9a19d695acda6c9e077f9f339da0e0ff6503 --- /dev/null +++ b/params_shard_176.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353522361d8d342d8d576776406fb74c560bb76e85a1ac895765f8f5ea387bb6 +size 57802752 diff --git a/params_shard_177.bin b/params_shard_177.bin new file mode 100644 index 0000000000000000000000000000000000000000..9f501064c61a3a65c505898ec595cede0849a67f --- /dev/null +++ b/params_shard_177.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2290c1f76dd28c4fd1f7b8e55aedaada333e717530e7ceada0bfcf853d76034f +size 115605504 diff --git a/params_shard_178.bin b/params_shard_178.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5db7e4b1088455f69517cde900665eb50f17425 --- /dev/null +++ b/params_shard_178.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd116d16922b715a810dab6208fedebe1aa59ceac2bb310895f929fec41654f8 +size 20697600 diff --git a/params_shard_179.bin b/params_shard_179.bin new file mode 100644 index 0000000000000000000000000000000000000000..55dca6a35adac1d5924247eab1f4cd0eaea3b46a --- /dev/null +++ b/params_shard_179.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff418b2ef1f91b71eb44dd84ae7b615216b419fbcf2d9904556c83deae83b63e +size 33062912 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe4e5ae4d9c1b0d2d6defe111551a8cd81473174 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81614b51e848e9472886863d009c6fb6b8830170df6a75a7a44aa28fc9264ea +size 115605504 diff --git a/params_shard_180.bin b/params_shard_180.bin new file mode 100644 index 0000000000000000000000000000000000000000..85549eca9311e8876004994ec93d0795f76eae33 --- /dev/null +++ b/params_shard_180.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8e44691d86e7c1bf990d75ac8c00ce51acf16e8d7b61e6f637fd4955740327 +size 57802752 diff --git a/params_shard_181.bin b/params_shard_181.bin new file mode 100644 index 0000000000000000000000000000000000000000..79d791fb231df77017595a9469a95d28a9fd539a --- /dev/null +++ b/params_shard_181.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2415c2efc81493471c32555a627782672230e0d08d34cc02691ea783960a4393 +size 115605504 diff --git a/params_shard_182.bin b/params_shard_182.bin new file mode 100644 index 0000000000000000000000000000000000000000..f992be9ea7566a1bf7ea5172589da80ce2dc01b3 --- /dev/null +++ b/params_shard_182.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b776efacc84a000be51dcf029577a4273029bbd399ee3e5a53732b1a315a507a +size 25815552 diff --git a/params_shard_183.bin b/params_shard_183.bin new file mode 100644 index 0000000000000000000000000000000000000000..80da75112338273ad268d9c0cad2d97fd7f8a3d1 --- /dev/null +++ b/params_shard_183.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04999e2b6196e0521a0b6f445b5933481c518cd9e59637affbc6d80e6c2fc096 +size 33062912 diff --git a/params_shard_184.bin b/params_shard_184.bin new file mode 100644 index 0000000000000000000000000000000000000000..98290df18b03ba8aba4239eee9e483af8fe74cad --- /dev/null +++ b/params_shard_184.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a168e5a2e2ced46e60a8a47cae4e22be254ac5e78ade1b95be8452ce49adbc1 +size 57802752 diff --git a/params_shard_185.bin b/params_shard_185.bin new file mode 100644 index 0000000000000000000000000000000000000000..35f100e16d5a505b66bf8a0fdff58aa844d054b8 --- /dev/null +++ b/params_shard_185.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e650f91068a376dd2d3808795fe2fb6c457f5462aca265909cb14ebf39278b06 +size 115605504 diff --git a/params_shard_186.bin b/params_shard_186.bin new file mode 100644 index 0000000000000000000000000000000000000000..444ecf3353fcf6a3491805d8463500bb03045421 --- /dev/null +++ b/params_shard_186.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156b2515ee98c2d4f51e5b4a5a27f298682f04a853459a3dd782e090f2dff9bc +size 25815552 diff --git a/params_shard_187.bin b/params_shard_187.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d47564b030bec7c70f460d238733ddaa2434b73 --- /dev/null +++ b/params_shard_187.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57863b3871f8a09cd0f00b05587891074b161ced0e9817434692aeaf52a3d9de +size 33062912 diff --git a/params_shard_188.bin b/params_shard_188.bin new file mode 100644 index 0000000000000000000000000000000000000000..278c39ed5b8e8b6f2ff40d76489d650ef983e6f2 --- /dev/null +++ b/params_shard_188.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82cc07dd6d3966a00a23bc5a014dc9bb8239ec886c2e837fd9053c674aca987c +size 57802752 diff --git a/params_shard_189.bin b/params_shard_189.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd61175b285893e134cc3ad197a57d5045f268e --- /dev/null +++ b/params_shard_189.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9d71afde2503b2d66683086d9adb6bfac844f90365e41c8e06f5a49250a3e6 +size 115605504 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1814fdc09874ae3a67fe986b82394a6596d4e6e --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66b80f84c592339fd762cb9a517e2b6ff7217baedf755d05d0fef24d680bc7b +size 25815552 diff --git a/params_shard_190.bin b/params_shard_190.bin new file mode 100644 index 0000000000000000000000000000000000000000..4db0836cb503dfe62ab7fe25858311b649b604d4 --- /dev/null +++ b/params_shard_190.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf8f184dd592f75828d54ea234564e2614534a6899e5d13055f520f4ef618f47 +size 25815552 diff --git a/params_shard_191.bin b/params_shard_191.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f3dd64cc78a3365e6d946ed8538951afccb15e6 --- /dev/null +++ b/params_shard_191.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d24292665cd66070f7f5fa97ddd0c9d18a97cb2576f713d83dc8867b6688fb20 +size 33062912 diff --git a/params_shard_192.bin b/params_shard_192.bin new file mode 100644 index 0000000000000000000000000000000000000000..b21e48d1073d1ecae1e6d177ba97d0f3b19f93d0 --- /dev/null +++ b/params_shard_192.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae1a4dc9fbf492140415e250e4c0107100fdd83ddf39d2a6ab9bc60b2841f44 +size 57802752 diff --git a/params_shard_193.bin b/params_shard_193.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1b1218a7ef3ba4d40a06af6c2f880eec5466946 --- /dev/null +++ b/params_shard_193.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de6a3b1d6bf1437247c0020f239211717e7c96269ca05fdd69743abbcf049e2b +size 115605504 diff --git a/params_shard_194.bin b/params_shard_194.bin new file mode 100644 index 0000000000000000000000000000000000000000..64c98cebe6de2e94a283a109e2591ab59d26c756 --- /dev/null +++ b/params_shard_194.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93fd02c875b2a35009f8a55d63496daa105a1e641f813c5a0268c9608d77afbb +size 25815552 diff --git a/params_shard_195.bin b/params_shard_195.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b86cad90f61f7bfc211d4daf5578837867b7360 --- /dev/null +++ b/params_shard_195.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b5d9a48f8c5a0b742a01b1e815df6b97d52906022b070260ace08cd9df54fd +size 33062912 diff --git a/params_shard_196.bin b/params_shard_196.bin new file mode 100644 index 0000000000000000000000000000000000000000..51dd66bdbd2d62f0487383932d2149e56680b699 --- /dev/null +++ b/params_shard_196.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ddeef77716a19cf68593861a4fa83ec316d23c3ab449154bc3dec218065964 +size 115605504 diff --git a/params_shard_197.bin b/params_shard_197.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8a51d54521856656349e3c707ef474db602723b --- /dev/null +++ b/params_shard_197.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5658023e27a3c166e608e42d46f60d4b4cf35e107c82852619c4dde27f8dcc +size 33030400 diff --git a/params_shard_198.bin b/params_shard_198.bin new file mode 100644 index 0000000000000000000000000000000000000000..4304584c102cb4dcec22f114c93bc71f9b5bbcf0 --- /dev/null +++ b/params_shard_198.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c96e65487ae4753a2163bcd6542e0d4db773dc129bf4557e092c48515978e7 +size 30966016 diff --git a/params_shard_199.bin b/params_shard_199.bin new file mode 100644 index 0000000000000000000000000000000000000000..8aa011450b5169a4d93bf24295c311a1ea28ccd7 --- /dev/null +++ b/params_shard_199.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3254622a67bbf7b558731001976e616cdd7c0c6618719c7d4a92ff7847c00955 +size 57802752 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbc37da001f3750f73de8fd554b42a5d38add24e --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6b34f92f829cd1d23c5baaae1ed0109ab8afcd05f387a7275c619b53d57171 +size 57802752 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..115019c2fd7b44c3466b4155a460805878249776 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:939fcb5bd699d7dbaa2ca0a9944cad999977502b73e017a464fdf5a1d4df7cfe +size 33062912 diff --git a/params_shard_200.bin b/params_shard_200.bin new file mode 100644 index 0000000000000000000000000000000000000000..5847701ebc278fdd6aacb4f378a0154bf86ff8e7 --- /dev/null +++ b/params_shard_200.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202df2be9e262ae9c61c0fac2c410f13a2093165f9cafe47d099d81c68031003 +size 57802752 diff --git a/params_shard_201.bin b/params_shard_201.bin new file mode 100644 index 0000000000000000000000000000000000000000..e702387387e09b844108af20fe3972c377ddd86e --- /dev/null +++ b/params_shard_201.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce028e915d255231c6e729a021b8b8df2ffbadd77f7fa6e990180736383010da +size 115605504 diff --git a/params_shard_202.bin b/params_shard_202.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3367a91bf8ef9f8c020c4c795a12a4e36107e0c --- /dev/null +++ b/params_shard_202.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51198381b98cd2bedba072bfa0dbbcd7b3e07c30a2f9d7198464c0ae493e925a +size 20697600 diff --git a/params_shard_203.bin b/params_shard_203.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5936d5e7ca13c7639f82837827ba8266824318c --- /dev/null +++ b/params_shard_203.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d2c8121abe2d2eb515cadfa00a32fbce2395cfe517b87725cdf4eec0403e1e +size 33062912 diff --git a/params_shard_204.bin b/params_shard_204.bin new file mode 100644 index 0000000000000000000000000000000000000000..fce806931adee7d7d7899195bf44badc32cd28be --- /dev/null +++ b/params_shard_204.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89fb51423fefa62fd31488f1f5983d655fe04b7516a9a408233b767dfc670d50 +size 57802752 diff --git a/params_shard_205.bin b/params_shard_205.bin new file mode 100644 index 0000000000000000000000000000000000000000..c142d0789ddd58fd971e965cbb93d4cebbfd9b21 --- /dev/null +++ b/params_shard_205.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35aa8634573410e5d15d4a48b7d0846872c8090b9db9bf6a89ac70a745ce368e +size 115605504 diff --git a/params_shard_206.bin b/params_shard_206.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd67566117bb08b386d268689a5076724de4e6f7 --- /dev/null +++ b/params_shard_206.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06869c06201bbfb09e656ac1b13b1de88237d4f3c9666fb3f76eaa0600feb840 +size 25815552 diff --git a/params_shard_207.bin b/params_shard_207.bin new file mode 100644 index 0000000000000000000000000000000000000000..93f5da91571daf292bc71894dc3f441af58a82e4 --- /dev/null +++ b/params_shard_207.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:683ba0cdb8ca91d60110f1bf6a0d721f7d2939351bd8a4ca8632e08fd4978dda +size 33062912 diff --git a/params_shard_208.bin b/params_shard_208.bin new file mode 100644 index 0000000000000000000000000000000000000000..e860eee2ee2786b33a95369b16aab27c0e24013c --- /dev/null +++ b/params_shard_208.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4242818f08030dd6ba928ceee4e003e9e368083594ec3d9a02f5b0d7e12988 +size 57802752 diff --git a/params_shard_209.bin b/params_shard_209.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5d2a7d801b9ffc2eb3437748968f22866a16acf --- /dev/null +++ b/params_shard_209.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d91fdf8c80a92db746320b95d3c1fe021323efed567d2ed71591ce5a43ca2dc +size 115605504 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..6dc03018540c9f8a2ef6d55d739bff21e9f521e3 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e3fabccd0e8fbcfc9dd96eecc107a8e0f2a04199df99a1f074af62d4fe0e7d4 +size 57802752 diff --git a/params_shard_210.bin b/params_shard_210.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3f8d9f31abc692228bef5bf9ecfe587417f289c --- /dev/null +++ b/params_shard_210.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb9002137758fccb8617c981f0c5b0a7afb769c8064b82d4fb021ed606032ce +size 25815552 diff --git a/params_shard_211.bin b/params_shard_211.bin new file mode 100644 index 0000000000000000000000000000000000000000..be39af512ab268b94aa63d6221ff2610c94b1974 --- /dev/null +++ b/params_shard_211.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad58937464bdb7322be0916d6db8bb1d678e860860274e5ecb725544634e9a5 +size 33062912 diff --git a/params_shard_212.bin b/params_shard_212.bin new file mode 100644 index 0000000000000000000000000000000000000000..9653c07eeae0d4b3b3ff295cf4ef4eb61604f5fc --- /dev/null +++ b/params_shard_212.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:177e2ae281b8534009b4cbdc4b46865550058b2a5911c16cf8635a08e6680fe1 +size 57802752 diff --git a/params_shard_213.bin b/params_shard_213.bin new file mode 100644 index 0000000000000000000000000000000000000000..26d28b6a01a6bdd20b3a4f9a3f91e37e3391331b --- /dev/null +++ b/params_shard_213.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67133f1beda165b739c2abeb0ce713f81ae22dcc1fc363d573fb2160b62ba692 +size 115605504 diff --git a/params_shard_214.bin b/params_shard_214.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d6f53ef3359f6aa2e7adf5c88348e1d9dce9578 --- /dev/null +++ b/params_shard_214.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1874f724f72e108a678615f8aba5b59a400d28bf42a721f7f4fe11896079bdc6 +size 25815552 diff --git a/params_shard_215.bin b/params_shard_215.bin new file mode 100644 index 0000000000000000000000000000000000000000..c60947b7e43c82b639c25ed296f5f2a3aa49289e --- /dev/null +++ b/params_shard_215.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85355bea981f0da1c59e78b2b2dff07cbd736fc4f58103a3dd448f4e7c15555a +size 33062912 diff --git a/params_shard_216.bin b/params_shard_216.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e7401ad57310d5c4c995042648568f99c87fbcf --- /dev/null +++ b/params_shard_216.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204a754e1b384cb796f39ff7ef12d28a9c6f03804278da6263c12a595ce8bc90 +size 57802752 diff --git a/params_shard_217.bin b/params_shard_217.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fb0cefe2bbaa5d8468be67826816493f55743b8 --- /dev/null +++ b/params_shard_217.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76c87e186c2e20bfbc0b9e449328c312cbc53b0e6984f3ec2968b99b7b2ac1e +size 115605504 diff --git a/params_shard_218.bin b/params_shard_218.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd35ba245070db1ff2ce6d708a3c4131503729a9 --- /dev/null +++ b/params_shard_218.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d2286673c498b47e248e7837d85793406442d3955b3cbe95ff8d9e8424f154 +size 25815552 diff --git a/params_shard_219.bin b/params_shard_219.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d35e449b446343f9c21e5ca32ef47e34a1490e6 --- /dev/null +++ b/params_shard_219.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b889ed2d5610bea2c3979b7e881bb363c251cef27896f295792c423debcd3230 +size 33062912 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..f52e5bb74ab2ad7d5cdc74ff6ee769d46fb7246e --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1ac31a1d84f400e86276aafeaee1ea17454e31f9a0958648833d72785cc51f6 +size 115605504 diff --git a/params_shard_220.bin b/params_shard_220.bin new file mode 100644 index 0000000000000000000000000000000000000000..9597e9b3b553b2b2a48786ece27556a08c0e6eeb --- /dev/null +++ b/params_shard_220.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5d4cceaf8aff25d841982a40e1f7081fcb77d85b6c8b30db906d1487287caf8 +size 115605504 diff --git a/params_shard_221.bin b/params_shard_221.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a1579d078c5a5d815167d4e081f94d41070fb88 --- /dev/null +++ b/params_shard_221.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83faa15764074963f06f8150a93a2f2e4ce4d00301e1e5f87fa602de214b322 +size 33030400 diff --git a/params_shard_222.bin b/params_shard_222.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e85ffbd597a9be47048ee0a54193b5de2fb0bc6 --- /dev/null +++ b/params_shard_222.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc960bc8935577cd11a033d6123bf44afddcb4080eaab01b0e56f9c749942f3a +size 30966016 diff --git a/params_shard_223.bin b/params_shard_223.bin new file mode 100644 index 0000000000000000000000000000000000000000..104ad25cd5acb264ce775944429fb3a5a7f959cb --- /dev/null +++ b/params_shard_223.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b40064f816736b50a1455ca2ccfb5c6c25c3c435b299ffacb243aa64ffb1d04 +size 57802752 diff --git a/params_shard_224.bin b/params_shard_224.bin new file mode 100644 index 0000000000000000000000000000000000000000..93e72b23a820047d159af908867f6b32155915f1 --- /dev/null +++ b/params_shard_224.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639cddd59e6116fb624bc4a6551da43aca7c130a2a58bbee12885b864a0127d8 +size 57802752 diff --git a/params_shard_225.bin b/params_shard_225.bin new file mode 100644 index 0000000000000000000000000000000000000000..2bd32e2abf3c4e969a9b40f4047c0b74fa201e6e --- /dev/null +++ b/params_shard_225.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327078af4faba3d333b32f5fdb6e0aede3d82acf5a10185e1741ed403e15e8f7 +size 115605504 diff --git a/params_shard_226.bin b/params_shard_226.bin new file mode 100644 index 0000000000000000000000000000000000000000..108fe9c4a94bb95506ba2f0f22d88f8270248fcf --- /dev/null +++ b/params_shard_226.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1b09f26b2c20b765235ba18d9eb85d838e4cd2791df7b4568aebcc57a7728a +size 20697600 diff --git a/params_shard_227.bin b/params_shard_227.bin new file mode 100644 index 0000000000000000000000000000000000000000..092ad96049a6ffeeaa83ce0227e19627540c5385 --- /dev/null +++ b/params_shard_227.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1832478bf6fdf336df221fcedf77e5ab458b3411976f8f279adc4f7f3ecd46af +size 33062912 diff --git a/params_shard_228.bin b/params_shard_228.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad1995791b78b65695949e198b45df9ef6909364 --- /dev/null +++ b/params_shard_228.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:146f1278c3c79641e52a6d38333babda4400caab9da490965d16448734e4a949 +size 57802752 diff --git a/params_shard_229.bin b/params_shard_229.bin new file mode 100644 index 0000000000000000000000000000000000000000..7737ce2280e87a1ee7e4d4b2c963aa26d2a67869 --- /dev/null +++ b/params_shard_229.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4545c4844e085ab9bc316e2e131476b09fed8baba61abd44a9f93b8ad3655683 +size 115605504 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..63858ec57c6e1cf4f25fe948a228013b254db475 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5dce41be2e9d7bab7d43908d1bb7f2200863cebc408d4b264d74cf40574d967 +size 25815552 diff --git a/params_shard_230.bin b/params_shard_230.bin new file mode 100644 index 0000000000000000000000000000000000000000..332982e2e28802827663496a71f59e302e0c9ae4 --- /dev/null +++ b/params_shard_230.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8e4cb3fa1b7e6e8e3e76148ec9cf7e18277f58a767f3c63061ebfff29c92d1 +size 25815552 diff --git a/params_shard_231.bin b/params_shard_231.bin new file mode 100644 index 0000000000000000000000000000000000000000..2732dad0526f380c8c442b8f35f14b100340c68d --- /dev/null +++ b/params_shard_231.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67efe961c69d627882757b13552ff3a2a59ae7de894e262d4081f1ed6b4456e +size 33062912 diff --git a/params_shard_232.bin b/params_shard_232.bin new file mode 100644 index 0000000000000000000000000000000000000000..f23e5fe21111c582ed14c347a062bb5ba9b2f013 --- /dev/null +++ b/params_shard_232.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b35e519d36162dc28f7c91a24dd2bbfe173074cc331372854f541cee636c788 +size 57802752 diff --git a/params_shard_233.bin b/params_shard_233.bin new file mode 100644 index 0000000000000000000000000000000000000000..b94c032d279ad3e610b59c9fab34624af44e027a --- /dev/null +++ b/params_shard_233.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081748dbb8aa268ac8ca60d9fff72f2d74fddb3671b297587b3634dfac3b7ba5 +size 115605504 diff --git a/params_shard_234.bin b/params_shard_234.bin new file mode 100644 index 0000000000000000000000000000000000000000..db4c93a53b6f3a9ae56bccb95f15c6f790d9e455 --- /dev/null +++ b/params_shard_234.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f954cf47bc48480f4e6512bdc8edcba443eff8054606b3dd98d467e9c7f629c +size 25815552 diff --git a/params_shard_235.bin b/params_shard_235.bin new file mode 100644 index 0000000000000000000000000000000000000000..e70ad747841b4bb0f1601231ad1596dd7a32ac7e --- /dev/null +++ b/params_shard_235.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d40a09b7a896a7162359a550776af8613a27950e908fe691e5c9924a942a9d7 +size 33062912 diff --git a/params_shard_236.bin b/params_shard_236.bin new file mode 100644 index 0000000000000000000000000000000000000000..03ecdfb238f2251fc6186525a5350d61d928d11e --- /dev/null +++ b/params_shard_236.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c632dd74c9a560a1cd4b70142f796e40ea9ab9eb008662622161541ecd444643 +size 57802752 diff --git a/params_shard_237.bin b/params_shard_237.bin new file mode 100644 index 0000000000000000000000000000000000000000..49b8d5b799179b809a66a632d3c16bbd0f6ee6dd --- /dev/null +++ b/params_shard_237.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3390027d830d0d7d393be627366321a06bba06e61f5778bde8145543424b9e1 +size 115605504 diff --git a/params_shard_238.bin b/params_shard_238.bin new file mode 100644 index 0000000000000000000000000000000000000000..9284efa6768164778beda658f94adcfd0b10e5b9 --- /dev/null +++ b/params_shard_238.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8386de4e9ab55df1a995e859e569b02fc4c0d000b63c3258e6a24e82dd25eda9 +size 25815552 diff --git a/params_shard_239.bin b/params_shard_239.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6be82e2a13e10a570587bb879ba125867db6adf --- /dev/null +++ b/params_shard_239.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8277c912a5d66a984f714edf3361faacfb264b81d5c0d88d396fd78ec50b34b5 +size 33062912 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e7c144ec09462c81bd693457291e9aecbe1eab1 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b86d4af9f317dc946d4b276027f6fd79dc1f4cc101edd851506a3b1e5c8b9e +size 33062912 diff --git a/params_shard_240.bin b/params_shard_240.bin new file mode 100644 index 0000000000000000000000000000000000000000..b532edabe14d80e1542c1f4a586cb902982e9fb7 --- /dev/null +++ b/params_shard_240.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d8c3bbc1eb35dc26a7e74fdfeb6034153bf7dd7d97e43b474fadfd80059158 +size 57802752 diff --git a/params_shard_241.bin b/params_shard_241.bin new file mode 100644 index 0000000000000000000000000000000000000000..87979f4694aaeec746cc186012e5f844c74ce709 --- /dev/null +++ b/params_shard_241.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3166b76479e1b9fecea88b4d6e3c5202f63717c1300654aa2526720b4fae0ab +size 115605504 diff --git a/params_shard_242.bin b/params_shard_242.bin new file mode 100644 index 0000000000000000000000000000000000000000..98142ab82cc25238f03661e3cf51bf13fce8251e --- /dev/null +++ b/params_shard_242.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a690cda34b8abe75c73ffc4afa7507f7baabad4ccf64447cd02e50f51ab8b62c +size 25815552 diff --git a/params_shard_243.bin b/params_shard_243.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2ad92c604559a135983e2306f3c419c8b0ff759 --- /dev/null +++ b/params_shard_243.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c479cb20b78e4cc4ef2ed01ed69eae61beea355047db41dae6abf6699ae765f +size 33062912 diff --git a/params_shard_244.bin b/params_shard_244.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a832504a6de5f1335d77a76d4cd5a4109cb4744 --- /dev/null +++ b/params_shard_244.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a81cf52ade2d08ae914e4852fe9990fa72f757773c1670aa3736ef1a33e17b00 +size 115605504 diff --git a/params_shard_245.bin b/params_shard_245.bin new file mode 100644 index 0000000000000000000000000000000000000000..558e3012a4fc43a7d9c81b35b1a531e0bd840ed0 --- /dev/null +++ b/params_shard_245.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e067d6d1207b55a7ef5c13746b8ec7d588901b591222bc400de0b159d4cb77f9 +size 33030400 diff --git a/params_shard_246.bin b/params_shard_246.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a38a5c401cad28cdd472769c983d0c47a638ac4 --- /dev/null +++ b/params_shard_246.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1ff6625d6325ac42a858f2422b1380ed26bede34120206d23ebfe79ce1c20b2 +size 30966016 diff --git a/params_shard_247.bin b/params_shard_247.bin new file mode 100644 index 0000000000000000000000000000000000000000..26fdd30cf0e00b6a59d1b52e807b050a19fb5fce --- /dev/null +++ b/params_shard_247.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0608b7ef6434270917570d6e11981055944267f545f0caa2312c9f34099a61f1 +size 57802752 diff --git a/params_shard_248.bin b/params_shard_248.bin new file mode 100644 index 0000000000000000000000000000000000000000..1db348693bb7de3c4783affaf885410ef2d26ba9 --- /dev/null +++ b/params_shard_248.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49cee0fbc2510fbf9ef9efd1e8e44ed56eb05d830853762d472ec1eafc1d00d5 +size 13472256 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..9410d3c21384b8e66232be1df74c807d5972b962 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfd979fa9758c92a4303930432e564eb297810b2514b069ebea01589c7243973 +size 57802752 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..f36f480da9627c83548963389860fcf90ebb7c33 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63a2b6be1a0c090f08827ee5aee84a8455f32a3af26e64838db7127630f8eccf +size 115605504 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe51da3b70155df6d8f5538cbb204fa80a6b836d --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08e28549ca1367007476bff9446c66b487bc0491c8c8bba7fa20a9871f02d2c +size 25815552 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..93bbae484435f173ed393b8e45708c77ac850d14 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7154409e5900d16b281484e61fc527350e2e97200e25fc746dc0b5150b7c11 +size 33062912 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a7bd531f56664916be63e12b9d89c2617d49c3d --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c06498a4aa014f862f6e6b3006cdfd087a6126cdbb536b2c8082fbd635e938 +size 115605504 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c3cd286e9c3755f020e7e892064db61a4c260b1 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe46b599c04248cbd545d18351745a3b38a77b273eebbd4624a8681b567fe12 +size 115605504 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d6e8fd78a6fefa478b64a4deecdee9576bea3e9 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3fa50d93ad596fd03dae9b2609e265a7239a3fef10987345fd6b4ad4841b70 +size 33030400 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d94b90ce812900f48c3f865536f6d2d0a9c889d --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14b8f7225571ec2e31c23670d0fc80781f1dcec6f088213d32e4f2597e36bcf9 +size 30966016 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..faf3944e7541cfbfa11850162ffac9abaf1ad068 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cb7d5890a6df3ec8ccb238cd6047efd8de5bd5958d32cbfba1443546165a48b +size 57802752 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ff18675f879365598391f05fa7a2ac272c6b89f --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa89dfcafa6f30bd335293b80d70e0a521705f79bd796ad1c016d967bcb08ab +size 115605504 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..f90ec9e923eb58495321564c4eac1bfd47ea3afa --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7401ddffb86eaf17cf5de0f5b4f3b19bdc09c30f0f5193a5373557619ad70d20 +size 33417472 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..75e0955021f869ed560f7aea9e1a667634dc0988 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b60c051fed4b3302185ba7bb4eb041845ca5689360b9c5938a7a129f62a485d +size 57802752 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..7148d22e6122dfa644b713c38757a2393a382bf3 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c77c6e4f8a3deb783defd7b53ca3ab521820d29c82069666e82a3cf4381add29 +size 31664896 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..0329147ba40d88b87f3df90d69c23f9b163aaa4d --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a18a4bb161ae8ecbae2ba0fc6b732857f9c8b416fad3ab387d0362f2f86d91 +size 115605504 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..daf7a0084b17dceec0dee268830c926d562157cd --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4dd70312c438af2d6f005876b940aad89f3373f247b557d797a94fc4255e11 +size 27901696 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e99312ac88d9910d9ef7a86e2627aea692b8494 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b96690a1a6ec50d5fd02bacc39f24366c83555fd2c0c4222d82b7eb999b32d3 +size 57802752 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5454bb61d314ae5ebb78f7f251fee723c243ca3 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f73548a40c9e0a29cd8344f599cf47c87011f6b4a2d9d36e33b842d8809229 +size 27912448 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a4d1ffc8d3ad6bb6faa52f33a084a88bbe70ab5 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4691ae7d1c6f0b430abef5148ef84d298dd0cb47bf46ea26f98cd50c01f91870 +size 30976768 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..7deec9681b2089899e753ede76f46f08097b7f45 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4be75b3651cfb9560264b9eb9cfb7df4a9e6568a6bf64ac0300bb1d45fcb986 +size 115605504 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..3aa9930a8587a2eb7cdd901e44c6fba8c5e80d95 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf4b2148bc81413324ce206e6ba6b6b9efbd94dbc3d26b2977ae30ee39106bb +size 27901696 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6da0b04b4c572837030598679b78d36baed2b24 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:211705eb818b62888310368d2a95ace0ab16df01cf10e8c886e795579b724145 +size 115605504 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c0f91156b037d67951611420c80554584af88ab --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9976d886bc3bfad14791c989b7cc4f7d3954248c168031e50e4bc199633063 +size 30966016 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce988affa2dc68b6b733c5d40c8f3ef502530a5c --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa5b1828115eeb1830f43a22db68fe73cc0b6f61403dbb213e3cd01c36a05177 +size 33030656 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..13c49956cda2dc0753e3cf0c0513edc12371baba --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5f4232bc480883f9d2fcc4ff8ffc83732c44529e60a7376db8998a554c44cc +size 57802752 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..113cd7071a913bf6b8d36386838f46d53308818f --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06a780f8862cbd6c78d43ab7aeb550151f5c96cd2dbd6a9ac7326e31eb66274 +size 57802752 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e4e0da31baa209d753029d97889de464f521a93 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd33708c8cd5332046113ab3657eeba17720c9bcd33fd94651da9ad26e672f7 +size 115605504 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0f353e1240759d1fb7e7fbd870b63a9f57013b4 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e25f3b01899dbdf6b5b45582291fb7e08b95b926fe95550cb131997cdaf7cc4 +size 33083904 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..94ab3d140a564f68fcdf5294ebb039ca17196e3a --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fbb5969a9d48e99f28164ca805cf5e267d64dd7e7f9a8335ed3599b062a696 +size 115605504 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..e01bf2f448da7387fe3a50759c96f66d9cb33060 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b530cc7c4e3f5f4bcb5e2f9352ee62b9a8c5a4ea3bd27b37c57ff97ad5c28a3 +size 33062912 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..102c32cb436909bd77fbcd850781070052debdfe --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b9c6ea85caa2035a09644f20705b563b32259bc078b245bfa5df8ef7570fa99 +size 57802752 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..836df924fccb39c67719b1e3645e1f668d3853c3 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0615efb8dcf768d0189a3db96caecfc9fa72d3116b945212cc83ca46bc6915 +size 115605504 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..532b70ef114cf11ea5de637874dc5eb7be933212 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c222d6816ec90c1d4ec578958c57b30630fd4ce88cfc502ce8deb8b083651361 +size 25815552 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..95d93f040163d92f599616f1319677d6aac02047 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28f8815de73cd77b04a3684caa870a0fdbb4f01db355da39b348377fd4a4a06 +size 33062912 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..b80e3f59bee48bab821b94c7aaa8a94d961e481c --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb5966cd54ad441e1bb391b62d77bf3135706f38fa157030ffc748a7926a56ae +size 57802752 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..442612d9184b0e31111df80642c0e2ccd2c24e1e --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5e26cd811d6289690b7a7eb9257fc73bd596ff386fdbc562f2879add5803e4e +size 57802752 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..f101a22a81c716d53b4f3ffc3cb3793472f88295 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec6aec6055cdd3df3970841acbee61fc76bee36568f1527406b1705e9d2277e +size 115605504 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..fde55943e6b46aa1ba0bb3b45f8214eb53da7f57 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5c8439e8c15a4e5df68cbdab5af5fb23a924b2af9d05b41e569c88c6ba6ddd1 +size 33083904 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..a01457002322145324351a7eaa0986cd8936b324 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3899855d76c7900d8ba557f2d45ef4e732ccb6c056141f79aa14528fc190224a +size 33062912 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..18fbe004ea823bcaeb107a7c68b84bfebddbd818 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db7eeedd10825a5db8e999ee46b786dbb0db74a9a7ad8a16109ed7c8e40b179 +size 30966016 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..9451696be76e55c0e35097aa7509d3526833a733 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287e4ff8663e112c679eb9ae69eda11a25b62933e31e643bd32032afc014e189 +size 57802752 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..f219073cf723d6b65b3cb920022d67aa7e4a2d7a --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cebb706b8dce3e1eff67a841e92dfcf89ff418dce9d90e805af8f9caedf1af43 +size 115605504 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c890208e14230ee7a4e8ddc1563d6e89af1c68b --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd70ce302d52adde2651df2d1e1a696ff3554af549d0b01f58d73846a6b37a1c +size 25815552 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..3709f68fe6dff81429ea1a9adb1a6581e9a2d50e --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27b8622e9f77ba01d0e75e86c7e41d38531557b51f8a6c7f325f3861c4460d7 +size 33062912 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..d2b5d19bf7155d522ecd1f383b44fa8092f7b97b --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe94c55bc79d99e8f48db3d05177af8aaa6810e974d6d7b993e9341f9c65cca +size 57802752 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0e1ff934fdb40ea5af3696d51ce36f489a023e3 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d92bda2fcdf7cfc449828d6ef76703a638e85ee2fa4851727f9a062b9f6a5919 +size 115605504 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..82c9d72ff515f43e651361e5e9038afb27351ddb --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:759ad06b11c689822539eab1662ffe7395e7458cd562b86e4b5ea57d390da155 +size 25815552 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..24877eb3aa718fb40597b0bb3d515a7546362a83 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99109eb1b19dc04c140793009796488b7cd354d81e9bd975c150dc809945eda6 +size 33062912 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..97d1e7cfe42d598d5188e2d8fd28d5118582912a --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7047df4052000fa0bad711d52e59969a5f8d248f700937e9109f1b66b42aeef8 +size 57802752 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..ed28c58a31a2aae1abb93d633722252ff9385801 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5a38f71b634ec315b6996465ddb91046b1466c285eb6fe837abbfe8dee312e +size 115605504 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..0af7574b3685ede164d6515b6a7346655f8c513a --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b76c9862917f55f485bc7b3ea687522e6bbfc335575a66b222c7ae4a6f519f +size 33030656 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..09ced54dff34b4a2f48f63604743e0fe17e41326 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a26c5b133133f792c15200fb87b3c0bb5a4984813d5f01bc2a93bd2ae50ad0 +size 25815552 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..de67546e9b427415988507d677a195417bfc5767 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4908c309c6c448ef09ac6dfdb04f1b5a5cd22a118b93ce1220d9ff8e7f57c021 +size 33062912 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..38170b393a4ad5ca28f92df28ea5d01ce90fc4cf --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92fe05e4e27880cc29c217331a900fbdda743f3eaa4940d447df6924a8f185b9 +size 57802752 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b15aeaa5d230236e0c2d59e80721dae2872ac4c --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0035818e0e41ca06054e13b55ea2b776bcf60855aaf3a0eefece183a804a1e14 +size 115605504 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..f383ff92bbc3397848d91ec63fea9a95c2217a26 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5270d0aa35128cc671a05f73f8f880d0f7a4137e9e69dde88ed6c2b90a13ffe8 +size 25815552 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..f84f3736ce9190bcacc78dbcc671e0cbfa047ebf --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a52b84f567079a692cd7a8576e4ffb8b7d1b20744001acba80c627f94dcd98b +size 33062912 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..73caaefecb5b16ec7703f30144756441121fa700 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8df7bc2910752602d4c54f378b3265cf4d6d1a0f64311d5c998e2f5e1f5cc898 +size 115605504 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..8819bc0b1211d704ca840cbd975b2d5bbcaa6091 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00764cb06807ad21f975ae8bc42d1187d25ca0aa0c91828d72a5a153a8d25ab9 +size 33030400 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..f49ca8756b909211951d373f651c3eff1313f5a3 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d27baad1efb09ebf78479cd8c9d6bd1fcf5390871f888234bfa8a998ea57469 +size 30966016 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc9fe3c9315c665321ad717e287d596b4567a424 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3300b941ee4bd942ce46e2bfca4ed7601f72b94ad156ca6dcc4b13db7a5b17 +size 57802752 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa1db127aa7fd36af85d0d07b072f2de5f766a80 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e1e926b6b2ee270cc22a5791455f4313e68e174dd75dae12b49ce179b2a289 +size 57802752 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b2b0a136388880ae15c4cd0c08c44d29a6da9fa --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89750cafaf75592d689e2e71f0af55b64b94e55ab7e7da0d20a834bc6dc152ae +size 57802752 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..80497f1336fb89d7d48209eb2a904f1aeff5b50c --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85705dc44b27ce4ec1d87d2bd7fb794937317ddd73389ee8acc8da78ba8c371a +size 115605504 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..71dc677faa05a775bc7c9538064076ad8c24cbb8 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a719d76ed36c0433ce2abb5036f88a1255c9612ca5302f0a0b90b8d98f9bd430 +size 20697600 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..783d8bca6297f565283d4460ebad72bd36e2bbd8 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc35105f3fc565419c1cdc36f0dc3ffb623c81834094865f91739f85569e7adc +size 33062912 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f0b6c60f8bc500ea182862916c929cb5fbded5d --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aed6f8a0104120219fb6e89b484332d1e5dd14851c01d72a4ebcc714cb26aa8 +size 57802752 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..20a28bcbb78ceae3e79322b0fda941c3fee36076 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673bde3e465a701f57c8520620ccf886cc3f6c562884eb20faf60ca78aa34d16 +size 115605504 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf7c4ab9c572a861b4f75afdce4efb405bfcc3e3 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d55c4bfab334dc6e7839e31f89bf7c021b2d366fc92f81d1263834bf9162ff1 +size 25815552 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..5473532e825ae71123bf28d9a03627b794d11933 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f76c220f81b7c2a0253bb8ac0e725f8bd5df25fe3bad57fcfb4c779ed7d220 +size 33062912 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..7ee5ccd1b46b5aa37e41ba68da70c15412a5a7ec --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a6ddca72bc4f0fb4e7b10af46b254f0ceca27d8a6b7e40a04fa322a917470b +size 57802752 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa89b8d2066ffe48806bb2ef8db796f9f0301de9 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d30edc8e049765c941e854a81d2a56db40bb081da1d976687e6061ab7ff527b +size 115605504 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb92b12ad19c0245f45428239e699f858d3975b2 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7962781fa87caf79dc4f688f8a79153b2268435f356f8369653b6d5a8724b19 +size 57802752 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..89a14d8d79b2383802d7e972d4a8e938580b4772 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dde40e3fcc508cd583a09a76ebfe86f2de01335e4d222328a7cee7bd5502bd7 +size 25815552 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..803ab7ee7775716bee63df4c5833017fcc211df7 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdcf387acda495e3237d4ee229acd04f4a7bc98db8507e39c5a6418b3958ced6 +size 33062912 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a007f7f866f7e29eddf361440f8b85a146cc4ca --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd5c987a0b67fc66fd1edcad24b8bc77883aee2bc4b940a08c3af79fa68a0de1 +size 57802752 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..50e1b51a4a60d80cbb7837ab46218915c3cbb206 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d181d7bae0dc6adec364f89730467968c44d08e6a53a20574e948c85aba3ac6 +size 115605504 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..14faf2a640d9d46465e57c81c0a003d49f09ad41 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660dd886979724e776892a41020d3d566251c0736a4409092fcabc7547e19acc +size 25815552 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd2ffbff93eda52d0d00e5160abbfd097a61e573 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d0ce8c642423ca979b9adea31eed2e91d3afffbac42d634447b152b462d9df7 +size 33062912 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..441f9c71918e7776ae95e7448db2842c637aa3ad --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9fa692abd9942512b2a837e699aef7ddda270377bd63ea72d35ad2a93ef7c6f +size 57802752 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..83fb90b5e6c50e9134606ccd77353c5790e2c458 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c519a302854b4b8cc6a8ea86c43766fcbcdbfcb407bcec433307d3e2a6256fd +size 115605504 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..59f1d63e5425aad05c97f05b93fa6bbe87ce4a14 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c3e0d7b31e12b290d776b346a904ea443a09c09a7e1bbc3dda7239bebebf34 +size 25815552 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..58bedd5760fa9b0d10d83c7e80bc2999b00c1325 --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34be39f1b05280418e67451454b03dc21d5a7d5041b799c3d6287761a0dd7e9b +size 33062912