| { | |
| "metadata": { | |
| "ParamSize": 199, | |
| "ParamBytes": 30462466048.0, | |
| "BitsPerParam": 32.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1089994752, | |
| "records": [ | |
| { | |
| "name": "lm_head.weight", | |
| "shape": [ | |
| 152064, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1089994752, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8e23472764e7fb517ee453346e8aec84" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8c6db18f1cf3fdc5d28048dd6c70aa30" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9e1de98c0fdd58fee2554d740adce42b" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8f8fa9d41970a245fbc01147fa77caa6" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4b6830568b373553e07f54ec3bc12b3c" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c008ed49cfd47687244e3e8d4abe1ee4" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f21d506e561c6050b7ab971893ed6a3b" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "439f099f816e9153d1963aaf58dbc41c" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "877dadceebe8712790d2987bc169d9c9" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b2975609465548bde60a48b25ade68fe" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5ec3a38e4e6e8a45127e637c709f3028" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c378127cb38bff5470d880ccd1bea4bc" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "230acf5353830c9eb93bc7e5e207a63d" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1b18368776d2d0a4cca5130745035ccc" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dc035b6fa3a0de2c2d61e55dd0f491bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "410cea6936ce0166b2341a874813eafe" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ddda62c71460162cfdb74c5b9e251c5" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4fd30435bd244b0824db4d1cc0648f61" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "84aaf2b9a371658430efd6aad0b6ef7b" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b3909377dbb55aa84cb167c3871bc53a" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e71051b237626f5102bbfbb5c18617b9" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52b836108a3eb9fe5a62ab65dbd4b165" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6883fe88b7215c34d2a669babf401fa7" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "67207233c029fa8114b22868c99c2672" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c62f9caed8684a355977c7314d847608" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cf410e3669ebc206ee7484f16795f19a" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ebc120ed4ae5a237a17abe5299d426f" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bd5e025136f1285ea42d28b55d0238f0" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5915a7f333e21a9ef149abbf5cb5722f" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2136750e3d1fa8fa0736099c75d3cb71" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "283db4a52d208ad6be2520a448ff1d13" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2d15ab0a4163b5d240b5a0e98bc7d23" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cab920619de251eb2353fcdde78b5458" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "931fe3d2a4253e09bea98e7f6a33ce32" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "167b2eb4af53bc3d7e89c19ed003d8e1" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1089994752, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.weight", | |
| "shape": [ | |
| 152064, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1089994752, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "564c95ce617a7b17eaf18a07c2c24124" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b5178cc19c946a8363215ee3f4b1c006" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "709b000151bdde1de69ef765c8a71d79" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "107a9b77cc18c3f8675a6405184ccfaa" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "543052a8c32e6f911fb351f0fa328475" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "77b4a5e1721b26a72481cdb987e337f9" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6c931c032eb53612d18e319c94f1062" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "49fd2bba84708c5e7e50c658e0f7f16b" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b723882b129254e0cd67b4ef8bdfb147" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "43b20b1eb5055458989f12e8b151e8ad" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d4f8ba86980d43b2f5ec1ab5c9e605ad" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1eaa1663d991423f308eed2d89958a61" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a97dfa1837e124a9e63a6881d95f80b3" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d32f62e11762a01cd7841a452841b6ff" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ad61da598448c1550ca7f31e8bb7a5d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52a4b60c17531633354154bfe0b79c76" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0fd3e5f4ed491fc555982ab10e0ec59c" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fc641234369f4e0e648a278ce96f6f47" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8c66509be27272addfa91dd469fb086b" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "704c17475743172e21667016b332d2cc" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "efaae29d7ca8d6276731d8f161b5b3c2" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e7ed9b2a68b71ee94210378657129493" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "af4510f7711932956004ffa2a05ca53c" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b88be212b0699d9a99c5d6a864aef099" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "61aee84ddb5ef627798ca652290c147e" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bbaf0867ff9977eb4c7907c8de9daead" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6bcc9798490d3aa8d50431e8d351b5e7" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e6ebc0bb43eafbefc4b20aa94174f947" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "873882aab122084077534536c1ec7885" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3523fc18049be1c7c2dc225f5dd0a402" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8032e2cb5fa02f99a51550dd13d2590a" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a5f66cfbca0aff71fcb5b94ce8bbda21" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2b70b43a3afab4c9403e2c6244cfb033" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6a04d785fd9026a2acfa6e6fb38f953d" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5add0b37f0f9b635392b90c42ee8be90" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "96c559c656394797196a3d7d0b933f3e" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "401088d973a18b774e1f8d280d7ccdb5" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1a4496eae54694d27487650cc46c9120" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1fc9253ad0b55734d67930f9d8f99e35" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "813b72fdeb313decc509b4e75e99c964" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ca5b52c626eb75f42ba8c91b391c0fa7" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f6d89acd3653ec7949625bab572185f9" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4644b64496296716ca63ecbb3a2145df" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0c66dcb12c84fb9eba7a317030c1897b" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a1b2759783dbdcb58b8a9cc3ff9398e4" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3d902fe1caf8eff46cb0382d6184f93e" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "08cd83dbe142a3538aa9eb93d7942b91" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ceff44e728186bfe39b49fe1081301a4" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e811e14491b3c2ca232b3692983c0768" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a7d18b82b314ab8d852d49069cd904aa" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9f938bcdf64679b24b923a18397584cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8bc2aa3ce4b800fc00d6e7f17f3b366" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d9cfcd718977fc4005996ebb88331445" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c10ee877a42d111ef1fa4ce44a797c6c" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8eb9777db821834301bb24d64bef2f9c" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "41834fd312347e2325cf5503c21431de" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5965dfd72b21124b65bbc03b562fef6e" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1309c139eece66c532733301009ab307" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a78e68f89405d7fdea3cb22ea3466a57" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "043866217e957cf52b450d08fdde7e84" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9a6dc54aa48dab85c54977a33454c846" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "68f3afec6b5a097cbb1a014f3f3ee41f" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d6f721133d808ef075257c7a8051113f" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f38d229d1afe1cc09d8405562e50405c" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e179e87eb7e0351d2b78e610b0ce2ef3" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b5fc253d904257dc1e72c090c5f88095" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "42baf52f9597aa8a1946d07a0c04a0ba" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4945ca5307b4d21d2867d452738eed72" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "691d701b5440fdc0d248e26cfb25cb63" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3988166086991faf52c5bdc94d60cf1b" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7e845db0cd21b1eb61312c00d433a228" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "75297cf1dab3eacbb436a889c44dd5f0" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "adf46009dcfe63a9934d6ada3bc6a95d" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "20406953debce9a6e013ea65cdb78ec3" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "565559f4ac57f2badbb6c00c9b4f5aa8" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aad57560586ade2966727028eeb00863" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ca273693925a3356a70b94d5123200db" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c5505b2f390d8d047a20a4d7710e0ab8" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26356736, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 7168 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 14336 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25704448 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25711616 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25718784 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25728000 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25735168 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25742336 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25751552 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25758720 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25765888 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25775104 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25782272 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25789440 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25798656 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25805824 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25812992 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25822208 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25829376 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25836544 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25845760 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25852928 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25860096 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25869312 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25876480 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25883648 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25892864 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25900032 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25907200 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25914368 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25923584 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25930752 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25937920 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25947136 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25954304 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25961472 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25970688 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25977856 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25994240 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26001408 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26008576 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26017792 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26024960 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26032128 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26041344 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26048512 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26055680 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26064896 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26072064 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26079232 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26088448 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26095616 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26102784 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26119168 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26126336 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26135552 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26142720 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26149888 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26159104 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26168320 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26175488 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26182656 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26191872 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26199040 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26206208 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26215424 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26222592 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26229760 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26238976 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26246144 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26253312 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26262528 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26269696 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26276864 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26286080 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26293248 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26300416 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26309632 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26316800 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26323968 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26333184 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26340352 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26347520 | |
| } | |
| ], | |
| "md5sum": "dfb109929451dcc2fdef2542cfd847c0" | |
| } | |
| ] | |
| } |