diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,5153 @@ +{ + "metadata": { + "ParamSize": 339, + "ParamBytes": 59080134656.0, + "BitsPerParam": 32.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1557135360, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1557135360, + "byteOffset": 0 + } + ], + "md5sum": "67f34fefaa9266980b9208d5457abd73" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2e81a1326993b48aa6c0a0e2db255997" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f8a090408245264d0e1d9f03ce0ff6be" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6927ad033418e93830fd5c20356649ea" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a223d1622fd3915fdbad65eafda8a6e4" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "2afd839858d4e016d04f7c95f9a0d06d" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2acf32a0f2dc564615582e95870c5862" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b9c479544978dc779c72e0078a80bf6a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "1581be5ef3c41b3c4fbb76cb3f95ae4b" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "9909ec25bdab15a960ca6bbd4cd3ed50" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "debed77fdd005192812cddd56ad44197" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ffa6ca6d4bfba06596c49a63599affce" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "165a9f21f4a29234dcbc7b34be82f866" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "63776f932b349670f06408d47d66d036" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d14c45e08685b4a40ab8836047d2aab4" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7cf154bdc6fe0bbce71eb5f5d767f78f" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "184ecfb43030d3fc5ce4f3470628ada3" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4ae882fabef73c10c79c1e68d827ac4b" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6367f94a9269a3a2991bf012e3603cdc" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1a82f1815af7e1f4ab9f8c77ae9b7e6c" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "2c467e3b6fb07cee1be3c015d22a852f" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d28a038d9fc572e6b0a605f24e6120b4" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "80aaf33d39229293c809e04b71150b23" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "897c37d2c228b6bd59459f1cd99b9bb3" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "728deefddfbfc58bcc08a0b3510161de" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "bfe9ce70ce8da8f6338df2c4ae3eee99" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d1debb0dd2acc1f03735b497a6a91573" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c6d561a48d673a64fc597528028f2f84" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ea7a44ff31d1442d4b7bd0b6346093bc" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d15be3030f7723b5264bb91cd9c0d7c9" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "49e7aab101255e37daac6f9cce4837ab" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "332ad52c9ed7b5c055399f1bdffeadd8" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "880d1a1951e8567a960c7e038cddf756" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ea589c7836f08a74151a80ffdb2d349c" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e78c7ba76983227a9b7ac071ac694031" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fc038b0eba762ba3db2584fc52bf7cc7" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c7e7d3b0df84c2dcb207238a2cfd70eb" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "9c80d27e2ac65090d1a54f89484e22a6" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "28bb5310245adc625c044b53b878282f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b959be6d7954547234d24911c70eb098" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "459b0b5426e93eb1e6ba26d210879cfd" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "0e41e31d5634a8594305b5018627d9a6" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "277956c76d37ff451e1ecef7d6e90493" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7223c5b983a96990695b681977510df6" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4966281230e79130a0c83a675bafffb1" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "671d3b49254f6b85ed3e552271935982" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d1a6bd543bc0cf4f4bba354c5476a88a" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "cd00486b8a4f47e97d5fe392ffad2a6d" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "6af77c78f9896c0e7d8348f4bf9fa544" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "2febc5dc074971711bbf85d85d56b5b9" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1ae32afe5675d01f31445dc9babe0fe0" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "01e98c4a4ecdac353b870eea59956a98" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0982278968daf97185b185acb8900903" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "eb000d32c30d55b468b99312beee53f6" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "71e8f85b01800bdf7f80a066b23a97d5" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1119de3b05b35ef62f9e4c247c0c2cc0" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "59b82b76dd1085824b305f43b582247b" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "94983ab1c727ca95ad2f67e5bacc5c1b" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e2b4a2172aac6c8346cea1a3f1ad1b1b" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 1557135360, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1557135360, + "byteOffset": 0 + } + ], + "md5sum": "a9975b9929f6cbcce3824841a57fc801" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f03b6f6ac6635bded7c8a7435516fc77" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "886d85aa71ce9cc3c7370101ec384c08" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "3a8f857b7764fc2c88c4a513855ba0a2" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "cce4e1f3c7e07799d1afaf1a0a7389a3" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "273b3ffa5a07ff038dddfeb90fc39fe7" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7b1b6881a6d56055542559e21873e4d7" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "6c648a8c6d5ff5fd90cc81510ffdf83b" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ede2794b5224b1fcee990912b5be2dcc" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c33a040026505ee7c414cec291b9bff5" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "9ed3b87e5b5ba665466a1622aec9e48e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "b8fdcd658877004f59097d7567bd5fb3" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "9946ffb4a0e789d8d6406f92ea1ffa5d" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "84fbeba33d78da5d5b694301842ac512" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "084a1cfe4fa967fe8c157ad30fa8915f" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "1f818d16db5900bf8bdc8aa3ed228c34" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e91cdef9c7f1d6e403393870246fd9ba" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e5679aa30a4aabe293fbd55613167b7c" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "89131ba53a7e1fc78aa7370413f9d596" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "0f6aad1bceee17cec2d95b29e90aa9ce" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f8b4b3c2ff58b247af8a689abffa8204" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6421c12c41f29f5eee4f641dad330f2f" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e6135a78bc43599282afd1bafed6b2a3" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "36122d3b0cd6c5a92df92238cce1442f" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "391a50ec8a34aa6fabf8bb257fbbf36f" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "adb83850aeca29124b8fb19922b3da0f" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "63c6df03c2dc9c6db43e69ec286ffc04" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "54266fd4ecd1b0cff974041d2b4a62a9" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d97800299b00ddc6509adb4f57c49c65" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "94f0f60584d4dfb2c470a5dbef430802" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e96a7973af206c0fa75a6e2f883dab72" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "41ac68a489f978fa35831582905c84e3" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d1a9526f6e019dc79de4d15e57ef932b" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a83d3d3c318fffcd2b0021036ee2fede" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "7bf50a3837a0e772d312eb92bc737bd3" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "dab7f38101c6714355be14670bf5d8fe" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a856bde074b0989e15a30b3151798183" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "96ed317cbd6be8b95e97a1c3adfe1b48" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "93decd92c34cf19c8f11930156349253" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "72c5d31a76208332a510b51890f86555" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0bfc3b6477afe87807f00be4c0705ac8" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f4c3db7d8ec1725cc57121a100723e13" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "a9d76d7fd5e00bf3256e6b28cd726d38" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "12b6b7e6e456f7bc6d060a10f0f57df9" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "00910a3889248c7ab4eb09f26857508e" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a7aa4a83854d8cacabe5c9800e037505" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f411c46b2efa240562567bba8a28cadd" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a5e540c8eb66c214bee7390381126584" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "977d0100d7031df1c14bead595edd736" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "7dd8235c70f38b3462b89b44de972c90" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "512df7b44dafc3a825926c07784d621b" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f9c25eddf54ad2c175ab403b1c1b9360" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b2c63bf9de6ff53dcfb26ab9786a8036" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ac706d7c063a0f1c6416b14a9e326606" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d6f567a8e87c40b6d83ad420c81cff93" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "df415e24092ae9049fdf4c78060f117f" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f491e190b47a9c76f66291fc30b9691f" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "579457b16f8310651627389d69eae035" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "bff3ca6d834142aaab22c1ebe9af262b" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "66578e9a0a6f2bc841715f0b2c9e399c" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "be0323f5885990bc90538484d8c639db" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d09dff7e469c0bce0746e015c3cf5f7a" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0d86ea5b8827875c8cb1d096f60de1aa" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "64920703a8fb8185900e49ed2b3d2351" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "9de30866be4a3fa2f84b10b9596e54a5" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8d25a4c6596ee8f4a183a8da15b32b4e" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "778d55873ad0def708256ac2c6d1a21a" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "02718d31999f9a2f40495d6c0f9ae004" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "272ed181ff0bc3a978c54ca374b81c14" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f96373961841f92ac7aab810b2f9468f" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "acce89842b07ae21562cf374fe25d2aa" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "24f6261310781d4195e17550d47d6422" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2b6b477bd8ebac10a8faeb891b3a23ed" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e0e51fd579faba4fcdc9b3675a4d4603" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e36bdc4d40b2523b1df72b595843c56e" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "c647f99cf0bc0817202c61922b459623" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "dadf6ea01697c643e0762af7954ffe6d" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fa5349a20782501424f6bdff9064a7de" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ee18576580b802b47e75af5a943dca54" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "b6f77bdb7d16b23a58172d98dd24ac90" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "75e29503b4278735d7bed3b839077ff6" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "42cd30a45ba2e5e346f3da18e33fdd00" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a80f91403cc298fbf14f84fee5619997" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "8d88da49ed8e084375f98f772a8c4d63" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "001b097a55976a57599d3bf79400923c" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5dc88dfee67b5715bd9cde01b81d02e9" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e66d0180a010bcf3886199c97981dc78" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d0f9bc143b05bb246c23d7b59a70d02e" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "17fc584b920c382423dc03a580d8b1aa" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c7f095a07c7197a8c932b1574e442271" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d8d8eb068486717a17fe061c705e3c80" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "c36514ad27dc4b9e383aaf61ce3638ae" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5e0d7afca1422031a066e0a06918e531" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "85c36b4036eb71430a5f8a0b6305f474" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "2e7aa947a0edd873d9def160ea999c75" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "66dd7ea7fdd869783e44b8c92b74fd71" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "fdb608d80aa4cb4e647b3c366ee5406a" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2ea16213a91b52d4621f4b31c695b525" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "651d29fac360b4cb4f04c1cde07d2b08" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "9840318dfe9237d8ce6870f6333d8c77" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "fad99aa3fcb04e14fd018789988cc967" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "cb386e9fda208291406ae8ce77bb140b" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b55103af8e99eeb0d397da259d2f13a9" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "5b6b47d2ea7daf277f3101c0bcf26e8b" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "4665d7e469bf8b6cb8a9e674fd0a517f" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6ccfd6c5d1946cb0bf1417ef30ab1ccf" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "187646567ad0ca904b297a253bf52d81" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "5b2d993fc82788de5c485433f6bc91bb" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c05c7ed4b4a52fc58d5209034fd6d050" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6a4cb5b943d863b86a034065d45eb000" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "866f0a796dbeb66b775967b7943249a9" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "65be7c89389f65076f286d40ba432726" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d113d15e6dba4f4cc84940cff7451694" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "39c3d370514fdd948d14bde1023a384b" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7dc7861e9c73db508c05a40bcb97d7ed" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e9750137559f9dd30437420a433ec9e8" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "408145aa121bd3693cd825ca7b6f14f9" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "202970fdcbb262aff6b781ab17cce194" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "648b011dc160225c758a28df384a4ada" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "38103539e59b713bb92cc8bfd1069312" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "a86db11ab81929f0aba80a0834b8eeca" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e04429e9a57c51dd02496a592934dcd6" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "941a605b675e3fc32665c200ee6a477a" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "324e39339e4e85cd6c876c8a844b7a00" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f7fa8e210710f23156622c946d61a387" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2e5d4748562733e5095f63733fecf426" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "03031dbebf45e8694fe8102bff1ef2ca" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4c4804da3ed29253cea51c7ee3a10977" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0ab5a8f764e56c352fba20402280e244" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 5120, + 13824 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1301053803e757da8a8daa1b7856b972" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 27648, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ca8d7553c1d9730656c7d608f63de4be" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "0c48d9f737a4c87154cf0edd82787f60" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c898d09cbb13be13fb64b23ace6f4ed2" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "63a2e3a06e27c0ceed77eb0fe211cae0" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "76e672f671b7fb09b5ed051fe090eed0" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 1681408, + "records": [ + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 10240 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 20480 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 30720 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 40960 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 55296 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 65536 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 75776 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 90112 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 100352 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 110592 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 124928 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 135168 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 145408 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 159744 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 169984 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 180224 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 194560 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 204800 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 215040 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 229376 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 239616 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 249856 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 264192 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 274432 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 284672 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 299008 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 309248 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 319488 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 333824 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 344064 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 354304 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 368640 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 378880 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 389120 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 403456 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 413696 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 423936 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 438272 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 448512 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 458752 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 473088 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 483328 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 493568 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 507904 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 518144 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 528384 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 538624 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 552960 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 563200 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 573440 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 587776 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 598016 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 608256 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 622592 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 632832 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 643072 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 657408 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 667648 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 677888 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 692224 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 702464 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 712704 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 727040 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 737280 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 747520 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 761856 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 776192 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 786432 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 796672 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 811008 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 821248 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 831488 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 845824 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 856064 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 866304 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 880640 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 890880 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 901120 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 915456 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 925696 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 935936 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 950272 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 960512 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 970752 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 985088 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 995328 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1005568 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1019904 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1030144 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1040384 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1054720 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1064960 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1075200 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1085440 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1095680 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1110016 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1120256 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1130496 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1144832 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1155072 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1165312 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1179648 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1189888 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1200128 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1214464 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1224704 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1234944 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1249280 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1259520 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1269760 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1284096 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1294336 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1304576 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1318912 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1329152 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1339392 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1353728 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1363968 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1374208 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1388544 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1398784 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1409024 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1423360 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1433600 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1443840 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1458176 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1468416 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1478656 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1492992 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1503232 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1513472 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1527808 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1538048 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1548288 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1562624 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1572864 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1583104 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1597440 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1607680 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1617920 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1632256 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1642496 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1652736 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14336, + "byteOffset": 1667072 + } + ], + "md5sum": "c69325134f4bca2125927b848a0c9fac" + } + ] +} \ No newline at end of file