{ "metadata": { "ParamSize": 199, "ParamBytes": 30462466048.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "8e23472764e7fb517ee453346e8aec84" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8c6db18f1cf3fdc5d28048dd6c70aa30" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "9e1de98c0fdd58fee2554d740adce42b" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8f8fa9d41970a245fbc01147fa77caa6" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "4b6830568b373553e07f54ec3bc12b3c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "c008ed49cfd47687244e3e8d4abe1ee4" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "f21d506e561c6050b7ab971893ed6a3b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "439f099f816e9153d1963aaf58dbc41c" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "877dadceebe8712790d2987bc169d9c9" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "b2975609465548bde60a48b25ade68fe" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "5ec3a38e4e6e8a45127e637c709f3028" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "c378127cb38bff5470d880ccd1bea4bc" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "230acf5353830c9eb93bc7e5e207a63d" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "1b18368776d2d0a4cca5130745035ccc" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "dc035b6fa3a0de2c2d61e55dd0f491bb" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "410cea6936ce0166b2341a874813eafe" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "2ddda62c71460162cfdb74c5b9e251c5" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "4fd30435bd244b0824db4d1cc0648f61" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "84aaf2b9a371658430efd6aad0b6ef7b" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "b3909377dbb55aa84cb167c3871bc53a" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "e71051b237626f5102bbfbb5c18617b9" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "52b836108a3eb9fe5a62ab65dbd4b165" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "6883fe88b7215c34d2a669babf401fa7" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "67207233c029fa8114b22868c99c2672" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c62f9caed8684a355977c7314d847608" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "cf410e3669ebc206ee7484f16795f19a" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "2ebc120ed4ae5a237a17abe5299d426f" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "bd5e025136f1285ea42d28b55d0238f0" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "5915a7f333e21a9ef149abbf5cb5722f" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "2136750e3d1fa8fa0736099c75d3cb71" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "283db4a52d208ad6be2520a448ff1d13" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "c2d15ab0a4163b5d240b5a0e98bc7d23" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "cab920619de251eb2353fcdde78b5458" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "931fe3d2a4253e09bea98e7f6a33ce32" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "167b2eb4af53bc3d7e89c19ed003d8e1" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "564c95ce617a7b17eaf18a07c2c24124" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "b5178cc19c946a8363215ee3f4b1c006" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "709b000151bdde1de69ef765c8a71d79" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "107a9b77cc18c3f8675a6405184ccfaa" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "543052a8c32e6f911fb351f0fa328475" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "77b4a5e1721b26a72481cdb987e337f9" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c6c931c032eb53612d18e319c94f1062" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "49fd2bba84708c5e7e50c658e0f7f16b" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "b723882b129254e0cd67b4ef8bdfb147" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "43b20b1eb5055458989f12e8b151e8ad" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "d4f8ba86980d43b2f5ec1ab5c9e605ad" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "1eaa1663d991423f308eed2d89958a61" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "a97dfa1837e124a9e63a6881d95f80b3" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "d32f62e11762a01cd7841a452841b6ff" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "ad61da598448c1550ca7f31e8bb7a5d3" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "52a4b60c17531633354154bfe0b79c76" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "0fd3e5f4ed491fc555982ab10e0ec59c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "fc641234369f4e0e648a278ce96f6f47" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8c66509be27272addfa91dd469fb086b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "704c17475743172e21667016b332d2cc" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "efaae29d7ca8d6276731d8f161b5b3c2" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "e7ed9b2a68b71ee94210378657129493" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "af4510f7711932956004ffa2a05ca53c" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "b88be212b0699d9a99c5d6a864aef099" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "61aee84ddb5ef627798ca652290c147e" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "bbaf0867ff9977eb4c7907c8de9daead" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "6bcc9798490d3aa8d50431e8d351b5e7" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e6ebc0bb43eafbefc4b20aa94174f947" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "873882aab122084077534536c1ec7885" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "3523fc18049be1c7c2dc225f5dd0a402" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8032e2cb5fa02f99a51550dd13d2590a" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "a5f66cfbca0aff71fcb5b94ce8bbda21" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "2b70b43a3afab4c9403e2c6244cfb033" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "6a04d785fd9026a2acfa6e6fb38f953d" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "5add0b37f0f9b635392b90c42ee8be90" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "96c559c656394797196a3d7d0b933f3e" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "401088d973a18b774e1f8d280d7ccdb5" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "1a4496eae54694d27487650cc46c9120" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "1fc9253ad0b55734d67930f9d8f99e35" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "813b72fdeb313decc509b4e75e99c964" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "ca5b52c626eb75f42ba8c91b391c0fa7" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f6d89acd3653ec7949625bab572185f9" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "4644b64496296716ca63ecbb3a2145df" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "0c66dcb12c84fb9eba7a317030c1897b" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "a1b2759783dbdcb58b8a9cc3ff9398e4" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "3d902fe1caf8eff46cb0382d6184f93e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "08cd83dbe142a3538aa9eb93d7942b91" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "ceff44e728186bfe39b49fe1081301a4" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e811e14491b3c2ca232b3692983c0768" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "a7d18b82b314ab8d852d49069cd904aa" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "9f938bcdf64679b24b923a18397584cb" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "b8bc2aa3ce4b800fc00d6e7f17f3b366" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "d9cfcd718977fc4005996ebb88331445" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c10ee877a42d111ef1fa4ce44a797c6c" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8eb9777db821834301bb24d64bef2f9c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "41834fd312347e2325cf5503c21431de" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "5965dfd72b21124b65bbc03b562fef6e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "1309c139eece66c532733301009ab307" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "a78e68f89405d7fdea3cb22ea3466a57" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "043866217e957cf52b450d08fdde7e84" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "9a6dc54aa48dab85c54977a33454c846" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "68f3afec6b5a097cbb1a014f3f3ee41f" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "d6f721133d808ef075257c7a8051113f" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "f38d229d1afe1cc09d8405562e50405c" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e179e87eb7e0351d2b78e610b0ce2ef3" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "b5fc253d904257dc1e72c090c5f88095" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "42baf52f9597aa8a1946d07a0c04a0ba" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "4945ca5307b4d21d2867d452738eed72" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "691d701b5440fdc0d248e26cfb25cb63" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "3988166086991faf52c5bdc94d60cf1b" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "7e845db0cd21b1eb61312c00d433a228" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "75297cf1dab3eacbb436a889c44dd5f0" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "adf46009dcfe63a9934d6ada3bc6a95d" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "20406953debce9a6e013ea65cdb78ec3" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "bfloat16", "format": "raw", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "565559f4ac57f2badbb6c00c9b4f5aa8" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "aad57560586ade2966727028eeb00863" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "ca273693925a3356a70b94d5123200db" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c5505b2f390d8d047a20a4d7710e0ab8" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 26356736, "records": [ { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 7168 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25690112, "byteOffset": 14336 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25704448 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25711616 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25718784 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25728000 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25735168 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25742336 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25751552 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25758720 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25765888 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25775104 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25782272 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25789440 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25798656 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25805824 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25812992 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25822208 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25829376 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25836544 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25845760 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25852928 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25860096 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25869312 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25876480 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25883648 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25892864 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25900032 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25907200 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25914368 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25923584 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25930752 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25937920 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25947136 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25954304 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25961472 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25970688 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25977856 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25985024 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25994240 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26001408 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26008576 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26017792 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26024960 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26032128 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26041344 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26048512 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26055680 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26064896 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26072064 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26079232 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26088448 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26095616 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26102784 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26112000 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26119168 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26126336 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26135552 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26142720 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26149888 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26159104 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26168320 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26175488 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26182656 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26191872 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26199040 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26206208 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26215424 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26222592 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26229760 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26238976 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26246144 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26253312 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26262528 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26269696 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26276864 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26286080 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26293248 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26300416 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26309632 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26316800 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26323968 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26333184 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 26340352 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 26347520 } ], "md5sum": "dfb109929451dcc2fdef2542cfd847c0" } ] }