diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,10826 @@ +{ + "metadata": { + "ParamSize": 746, + "ParamBytes": 54018692608.0, + "BitsPerParam": 15.7532493060186 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 2819260416, + "records": [ + { + "name": "language_model.model.embed_tokens.weight", + "shape": [ + 262208, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 2819260416, + "byteOffset": 0 + } + ], + "md5sum": "9db8ab4739fbbccb2cb6855cdb173a48" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.0.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "525fc86770c2c1dfd57cbc66bc8fb435" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "73d7df5cd22325f68e8536527f6c87bc" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.0.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "60e495d83a82158ea2a7eab3d380ffd2" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.0.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4e8a168918ee3feaf3c4b9b6eb07f22d" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.0.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "542fdec345b58f30e97e35a615c3ecd6" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "c9df0c4356e570df2d82d35576240ce8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "1e5d02fb93bd00867dc1727204255247" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "605025cfc534dc7b2f4eb30f6adf27d7" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "cfd960cc2494e6d5405c0619b770297c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3295bd1aa0fe848d2575821bd1c15589" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.1.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "e061c178964166ab3f0eb2bb515379e0" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.2.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "d2b6722fef2c4abfb941f1729fde8491" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "cc6a9617f7282d40bdf7fa29d55d014b" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "202dd37b5e92168248863d3017fd208e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "96381bd07aa882fc79a624dd5241c9d4" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "2af53721b9c9bee13716b541773103e6" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "93a711d1af357b5f09a60302e72fc1c3" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.3.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "b58d22e438da2808e350dcb1cef7ee06" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "0b25dfd7d47e1b58d00839b39bb22511" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "61e38e88109d280140af4cc089466581" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "7493a78c7df233dd12c49e97a55973a0" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d86926226a4e1bd0f039f73796e8f0d8" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "27c6c850b952f319ad783f24dba59f89" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.4.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "74f7650e4cc77e0ecad4c46ec73120b6" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "0a07c97fe646ac7b954338ade3bd2abe" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "72ba60b2ce8915df4bffb21b78e83893" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "c56dc92979fd82c0a2ca06aa17c456e6" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4a89ae09f6a5167a14ed3565feca3d42" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "048dad50c5f79948545675b23e0c2d58" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.5.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "28bb916cdac511be9bbcb10104ebab6d" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "7c4fc6cfcd19ba92e329205c7487c0ba" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "2024ae70d68a0ae79c1a577da90f3fd5" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "a46e97b17fa9d980107caa532a573031" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "23d3b110923d0d252af0610a2c57f7ed" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "43c0426c8ed1acfbb965cd9e62774a8f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.6.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "0db13e05225efc0c978db64154ae6929" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "f228a2fc11f6e81c7a4e617ca4c16fe3" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "bcee9882da0f3ab7382ec850ef03c969" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4b12852b7c49699bd544e4ff3ec6a218" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "9c14e70bd2cd2fb23a5ea0dd62f0fa63" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3f6dd555da7db2858ce4784e1fa4bcd2" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "663cbfa567fedf3aa034a5431f8761fb" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "e80c24213fd40364f6ac75d1b089e614" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "6c83281f394ac14b6dedc0d7275a491e" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "2dfc9010a30b52f563a4b22bdf9eb941" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "b72c84204532d12df085cb4e83c98d99" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.10.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "6543c383f9d708af7e7963e6a67086e9" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "ece300ac24307f4d356ce41e74ea1159" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.10.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "449db1501d122edef9f7a62ed1d08059" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.10.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "27990155cb15d499ccea5724b8214966" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.10.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "0ccd835f6869f2cb474093afff09ff73" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.10.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "53bd053ae7d11cd13df3a35cb106b705" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.11.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "7ec9337ce3f794e6267942bbdddd6f83" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "98e8936df67f43b4cc4d240899529b26" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.11.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "db24a0a743cd6981fab158491773e613" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.11.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "61bee7038062bc214bffcac584ad0ae4" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.11.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f1e72ac36873f31d7aacb01593d9ab35" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.11.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "74ddc3f49f2ebc57c8d21677172a2422" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.12.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "2d2d34fe562c1c4b3217b7bce563f625" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "0e6a642f5003063f8716b3bfa5afb36c" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "4d8f56196ab36e6e453115d810994a2f" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "01174c22ea62d461d4685ea29014b539" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "24dc0cc912ac34230f77863adc67025d" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "8dd24ca48d83d782951b984266f0a6e9" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "f7f9f2b4965fb16a85a50ad80d3fe9d4" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.13.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "e2a18ccb39d75bfc703316092c4348c0" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.13.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "dc9c910615fd2b0eeb1746c262cab7b9" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.13.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "aeed325363d1d7947699cf592c1c88b8" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.13.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "fa21c7b277cdde6e19f4368e9c9ec7eb" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.7.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "1b162f4b133b4f923f4f3d13bd4168da" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.8.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "05c479841e3ba089fc4968636dddf17f" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "b46acd034f202fb2346da49a2a275fa6" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "7f4a7b05471c85ee8ee721ce566a6edc" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "59fe3425f3748f50eeaedb428bf2fb69" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d8ae82a7ab6baeaf14d926c7e7c1cf6f" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "c907fd214c0adc4bc07996f86f5ae389" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.9.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "10cb5e70137cec1e278c379a9b7d71c7" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "87c72e0b0531ed2f7e0956a398913b2c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "8b1e5ffb2bd855995d4db0d41434182e" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "cb6e67ff98a52b1382d5804cc9a23fc2" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "7b881ac328ff29fb4cd42dd2816b550b" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "a1402e8f5aeb1098fb532287a6c60ada" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.13.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "ce26665a542ccd8728ccd5c1e151b4b0" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.14.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "3ac9d18c0b6a969979fe03dcee922dcb" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "83e11efba4ce4c9abcbedc93828afecd" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "a3a7c439becec2d2d21ec6d98ad36e69" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "c3e09250c59c417a510423d2385a25e9" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f17aa0fb82b3e0a91ce065c56992a8cd" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "7c58d43faacb35f1fe3f38ebaa4af09c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.15.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "c066d755a177a9f8a09f50d10fcf3c4e" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "2e0d1b05ed5168a9911f4ef024b619c0" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3e89accfdc75b39d4742e769c57d9917" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "cdf9ff981a084f3a17c712e1303297c9" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "6dd1ed55f0469a10fe1b86efb948787b" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "309c986aaaf64b01f23fe087adab56a6" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.16.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "c298b583fd0e6f5a42bff2093e082f77" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "7d9655333609a3eef1aae02ba8ed9745" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "1253ee38620aebf013772ffb9cdd0728" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "8b2b287605e4b4d7df18a7b62a41102e" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d01aa66d27ff0945296718ae1382d211" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "c9728ff49f18d9d5aea35e5c70d26233" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.17.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "fc660f4240c7f51056748502baac05b0" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "d2a0016359f4122f7dc5a029c4cdbed7" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.17.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "5c93cee987e87fa46510bfffd7304e58" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.17.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "ec90247544470a2d0747397210e2e70a" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.17.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "93502d845b53da9e94f2fd3a87b79d2d" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.17.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "dbaa4c974e865d09eb19100de817fead" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.18.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "66522f3ee2f5ebbccd3be6cd748ea3e4" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "a0c3b8d9a54724869200dee707bd5a46" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "d9f757df4d48d1595139c007be3f139e" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "dbf83b1676312570494ba169c9c159d1" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "15a6b51b07d9660411af1f45ee63a3e9" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "9a086d27456b2ab951c6f681ac6dae30" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "b84fa017d1c99095b988f9d11d76b9ba" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "1b180d88cacceb5e103f3ab5c4a0b4c0" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "caadabe77eb70990cbad0e1c3333cbeb" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "58b199e0255efdb13e3b08aa67939e67" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "d914f3d40926473c5eb49b3c38edc8a3" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.19.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "54d0f7ab0c6d0b3d170c0caeead2f796" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.20.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "9799985f00abdb9c2d40fa49e7033b57" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "c0aade48379c94f4f250c223bbbbafab" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "28c56307fb4e8058bb622444dc30eb95" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "0d89d786451bf31a85170b7c4cb06108" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "20b226d71fb6fc51dcc2ba8353bbdf74" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "9c3b9b684504a34499f9c70d6de28c53" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.21.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "1b5e2d740d7afa2b03d3eb18641e16ac" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "3a7ed5d6755afa6661c67a0346f963c5" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.21.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "0f8d9db25e0b0d5f06364e3d34d42b0e" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.21.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3809de0bf8fa5668d09741b45746b3ec" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.21.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "139ace6ca9122d462f9ee61461ec77de" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.21.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "1e47c265c8fea348de9ec378ee5b54b1" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.22.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "8858ce6e5a09c84fe139b831154883ce" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "8f3ddb7652eb609343b8c69143603143" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "f3f51c08bc5a14cc6621e14885262406" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "e6319eab95e0a155511bfdc6870d3563" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "48beff52a3ce052b1b96e3ad1b44ab54" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "4c0108b648640ccefd3ec84568d06241" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.23.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "10b69956b8936c2fb398263b6a3204a8" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "fed326d8d93fa618daf1fdde980d8d1c" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.23.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "627724b07978af1fbffc500482b996b6" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.23.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "b032ee45f47d6078667f77d30a5df8ec" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.23.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d7ee2b26f1ef82a3fcee30c89b76aa23" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.23.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "7595e13dfcad27df179c0c327d79ce09" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.24.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "4055217607959407f5e10262e4919de1" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "98486a7cb728eb866c70a60e9c841f20" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "75f690edb7ef8707993d156a212ed943" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "13c08b290fe6dc16f47079469f10bb32" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4caa30fefdb36185a0c81af891d2539f" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "8480071dea961caf0a12b360e58c867d" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "9a321dc09700c7ec2f17e96acbdf6958" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "1c801d1145a81654636e64f025d8a261" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "daee189698f1085d78a8a5cac6291a07" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "2ed89537e1091c8773fbdfbf96cb507e" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "392dd9638099abdc6ec4e40ef02b201d" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.25.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "0321ed4d141d7e2ce063d4daf02c05f7" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.26.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "caa9929debb0a9f6b3f994ec902c5e5a" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "c6778bd43069b009f8f9fabc9eeeadb7" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "6b7ad6a017622597bfb102d9f250f72e" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4ea2fcd353f719f33c1e8fcad13cefe8" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "221aef305f36681aea804091a21f7e0c" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3ff56bd121cd8006cd18a7209b1a0ede" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.27.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "1b602776802de78eeb719c04abfbb212" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "8abbbdd1513032f2b9990d7228fd7af9" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.27.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "77f9efcca45b9b12afd4a3784b39b814" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.27.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "b6dacac406698fde51ebd963e57b2d4f" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.27.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "6e74312465f6bb89ea94fac1311d3240" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.27.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "4feff801f8633c3d6960f82029d63b6f" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.28.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "fc1256bb8e61b47fd00a727f9a042f74" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "33c9ec7cab154e617175ad0fbe91c65f" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "6b7ce422d2b030de9a3c2262ae1f6b89" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "84007ecbe9e53e0de3f9a5037b98cfa5" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "c76948f9dd22f67dcedd2e1dcbf382e1" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "075f438aeeb3f8a7094efae18996110b" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.29.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "2fcf5b0bb799f6bf13becda06ae06841" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "6bca3cd6f3f44171384c37b7f8e162d3" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.29.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "78ecbfd4f7e30667355f1f9ef9322259" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.29.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f3f0ca333ab807828d57425b9252a594" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.29.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "350eafc807b900835d9f07489a7b633c" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.29.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "a1485335cfb49be794e53d6b618bb4f4" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.30.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "b69ef216c4395e42f204a559e24c8258" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "09a5023ee7064f9f5d6e546c6f39392a" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "753ffaf2a2391b76e376a3e3aba1d1f0" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "08cd100b06600f8c2ad1dc2bef22c29a" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4138944efcd82635619b42d210dd6583" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "952fcb9fb70f0fe1d6bc3ad91aa52713" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "a8de273bcd100bd99232358eded06b13" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "5a996be6be4860df65b9c3afb0a615d7" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3c66dce37c5f4e126912abfe8e4b9ffe" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4df56c71b50767549c43480afbb316cf" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "69d6110a780fa2645fe0823e37f4b06f" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.31.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "90fe9a351094f355e322fcc3a04aca3e" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.32.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "97f0d93a0f4e737ec9df3f86f0803d58" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "084f5b152520afebbbcf1a8f22881c07" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.32.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3fbbc9b07802e8c88abbf38f830472b6" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.32.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "b3bb719fde10642b0e8cf8b59d9a453d" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.32.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "655c754845f5537da4abfe0a28716b9f" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.32.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "bb568b08ad78b65e5c911d2c41c151a4" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.33.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "aa2d84b9baf3351572de35abbba0b568" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "639fcbafeea79891ad952f2b32df2380" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.33.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "aa9715ca162dde2902eacb2cca70e92f" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.33.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3e9f3e7420b9d517f85231fc5eca28fe" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.33.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f494c6801f82fc9e2b922d2cfa5431e4" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.33.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "cdc03983f358d56301536296ad80f19c" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.34.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "6ee7233a7868abaf572d87a5689c7771" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "a579ddc61de21f5a6181e6ac6d38be3a" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.34.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "79a46dcaf135439fa3ba570b83021c13" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.34.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "842343dd6da3713548002dfb750abf33" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.34.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3dd0edf60a548a845e9418711ca3cb4d" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.34.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "0f73541485dae4510502212dd92f8b64" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.35.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "0b2f9848089b98bb6edded1111009ec9" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "aed2854fe62122531e0b252e86255ffd" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.35.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "eb6aba8997dedc363d4968525a87cbc7" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.35.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "903a23007fc7b8449592b17faec5a035" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.35.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "99e52e7d174601ae06c09481af4aa1af" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.35.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "caf34895046736ba5678e67b69a0306d" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.36.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "5e6afcff5bde0ab64f486080b6f9492c" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "f324fd9669c3594463de8ab2e63276eb" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.36.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "7ed85b58401abbc1e1c3122bfd3df7ea" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.36.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "90c58356e1ebca66e1b8d5113694793e" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.36.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "5ed40db60ee89fbd16303b1e7e152e60" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.36.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "6811e8d91a307278c8018464c73337d8" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "4acc08ffdf17838b5dbb5fe5ca9b1791" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.37.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "6ae03f31b394443cfe6ad8cff9024721" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.37.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "772bdcc32bee1a152154b77d76d1b14e" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.37.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f6ad1652f99cee122fb5567e60d96308" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.37.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "a476ce510cab9de736b9ba7631c53869" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.37.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "08bda377b65cbd5e13aad999746988a8" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.38.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "c594e5100c3861601cbab39af15b5710" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "1a12dbe2332420324e483faa38ebdad4" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.38.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "8dd14761fd6f8b79d14a0384a85af179" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.38.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "b271e09758b01c948e6b3d8876211750" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.38.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "22b285555417a2bd3b0db2da17b0fd26" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.38.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "dc1c3b1747775eee05407863728e0f30" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.39.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "563a43570dbb8e566ea77c0a50cdf025" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "93b0ad783dfe2de502ec2f6bd4811f5c" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.39.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "945a946249307a8f15022d654d1c6a16" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.39.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "0a606433704495f32764e8ec62021043" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.39.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d4e6b670503645294cbb17b64ee0b048" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.39.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "cefa69e5d80f02b1f4d6a52f22ce0f6d" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.40.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "487ac41ce845def31393927de1bd0356" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "321d357876415f86f200205d7bcd65a0" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.40.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "9f1ed64a0c6a0979aff085b2930451b4" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.40.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "ec3f800cfbd7c6d5185e9a5cd310c18e" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.40.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "df93963a6f66a69d37d747c648264ddb" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.40.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "263a081fa872728895a72729f1af8d5f" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.41.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "f2942f151a98dbd179da6bf5c9ecb1ab" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "29161228570c28d6c0d779bd6a68d344" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.41.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "ed83c6a7679797d5d7f02b8a8af1a472" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.41.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "64bd81e5358736230620296e25109d56" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.41.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "0163ed5fbecd8b78aa258a43eb9046c6" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.41.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "ccc5c5b461c98b9327b8a22730fecbf7" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.42.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "fe7076688577ab9872aecfc50255aac6" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "31a1aa293c7eb02fe403cf3f318630a6" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.42.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "9477d3c8ec0716f777332eeb29b2339c" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.42.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "ca9bc850e95136bcd2a9df4cac8c6500" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.42.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f08e379b1b91e7c0c9e0634d776a641b" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.42.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "72557b2eac5b951f71e524b179c3d884" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "93937c9f141472e3bde0d515fb24b778" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.43.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "27e335cf3151a07b2272a093e56f13c6" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.43.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "b71a50bcdb3350e8eabf68de1be56ac5" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.43.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "a7d481455a955c4fd60f652a2672379b" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.43.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "60f66ab858cf972047cc69ed95437c89" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.43.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "19c951020957ea52da9064bf5d1b1beb" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.44.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "024c75324cc3d57e54b1b824ebbe1739" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "ea40c196888c318e3dc88a7ec5d1a4ed" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.44.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "21ff7755816281f12769a055f20cc348" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.44.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f377d0c2bd740cd180e7ad10fafc137e" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.44.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "98ba9e171c649533b69fcfa55865b00e" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.44.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "608852e06215293c07a18426265f3550" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.45.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "16c75a10c4ae446bae4694f2bdbb0b01" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "f985095d0872d9601862d29ed8a937ef" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.45.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "b0c22adc137dd01910999d2e79f950e9" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.45.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "145e834de39247b3e77fc92ad0f183ae" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.45.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4a1f90c0f521e987fd46d645023f85e9" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.45.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "33eed963fe8e9d328ad97941de5c4307" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.46.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "839225a3dcdbf268873f93bc6de4d709" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "5d401d09d804be825901813e1d8f624a" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.46.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "66f83e66469bfd5abd1d14e2ef85c5de" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.46.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "e3e0bfcec4c783d180b8bd55be8c3b4a" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.46.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "280d6c978fc3cf2f3c90cfdfabf7bc9a" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.46.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "6b8a8979b086362bb54e9491706f1e8c" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.47.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "6ef5d55080c0fcfc858b85ae1bb4fe71" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "988e6dd4b1437a20588125d091df66d5" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.47.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "83f3d6031a2117d5cc7d13dbaf013134" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.47.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "6e03d73ce895f85dadc128f759b713ee" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.47.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "91e8511f2759ec1a9d8a8b85e0f60db1" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.47.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "ba7c1b765b964f76adb93bd59601a139" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.48.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "9b2f6f309c4ff81f5fce52cc4d4efe3a" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "7a25006b0dcc975d566516e2510914b4" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.48.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "49bf7b6fd54f58cf48d7a45437589a43" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.48.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "b45b6f6e603b8f79745e25f771c9b4b9" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.48.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "ba4d2b427c2558bdab23eb275f4d24ef" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.48.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "d7c5b260b3fc2a36eb061eb676abc2af" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "765f336eaf9db7d84695f1b27af5b8d6" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.49.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "708facddf031faa14470d487fb8fa54c" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.49.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "169e8c71f60def0609d677e7c4a3316c" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.49.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "144180921027cc0ef07e5cda42355b81" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.49.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "8fcebc0a25ecaeaa58bc5d76ede10df6" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.49.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "ec4723d2ed763c3b459edb7f6d404a64" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.50.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "a83f50536d8ac9328ea0336987d5c6e8" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "e2a15f67ea3b0f0637cc0592219c9a11" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.50.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "d30322d17fb2cbed4c9270708f0a7bbb" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.50.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "a428ae82868587076a3a715c81df4599" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.50.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "ba69f6d3ccca513ed3f2f175203ee8cd" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.50.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "52062418c8de7d53a917880689b5f5ff" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.51.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "06b79d56d0ad98553730489092ca2fbe" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "092529b2af513f960e7ebb4c64ead70d" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.51.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "a923096e4b2ff0b24c952e05ac9906ed" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.51.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "17cb8d60e03e8098947767549f7f96d1" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.51.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f43c832e8a1af5b6c08c8df7a519d12e" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.51.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "ea7753144e764c3ead071d82f8718201" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.52.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "8b72bfa8253bcecf9b70ecd7f55e3eeb" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "8e8f0002a526b683bad22ca81a92ffd6" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.52.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3f57f322c8db7cfcf838f9ee4da2777d" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.52.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "dd9a5361d3381e0239c8862daa2a776d" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.52.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "0afea4559fb2bc1c8bb0ada806238b3c" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.52.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "71612d0a63b04221d375f69a59053474" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.53.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "0829779b7f1462077909bcf85d3dab7d" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "f6cf2763a62a47d02ec8e37e2d2352ab" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.53.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "b4adbc76ceb9e9fbaf78676688f3cfc1" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.53.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "a7d4e7345fc5ab1ccd1da40c5b43ead3" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.53.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "9429bf2f1d77c7a769132831cfd5da74" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.53.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "9ba679a6b350505081c7e5d792e5204c" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.54.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "a39451ef52e787587f5924ab58f5493d" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "b0ddc17d43ec4ce5c357c97c306ff53a" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.54.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "b1c416eeb1512702cf0256c441003c07" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.54.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "e0ee88a7ab9cd03d904063571e38542b" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.54.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d1e887b5af17dc5948c23f4cc95981e9" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.54.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "ccc0d71880ed25ef6823fcd8d297e5d1" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "31a54f3fbfdca781be13dd6238f4d7a6" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.55.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "205c6a8fb59ef20d609ddd3a816279c9" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.55.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "fc695ff4b3d5660f6c829f92a2abf7ac" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.55.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "fe128fc174744ce0a338b54140d0d5bd" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.55.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "80aa9d8cccd1e8c82d0d39ba58c7e83f" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.55.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "8423fe95f70c0e5635f69f804e7fdbba" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.56.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "cdb857c2ca3a225a9698f47f50f5477f" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.56.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "56e86a5a03d3ecb163f1935f9c9ac36e" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.56.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "9012d2d3a34e872b5a346d6adecd37fa" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.56.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "6cdebc0812557695ec6e1e43baa2926e" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.56.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "974a10889dc290f03cd2b44966686f54" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.56.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "4684ec332246df3dc21b0e783bc129b7" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.57.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "5b4fbb84e25b940eddc62e1945734cbc" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.57.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "5276a0f5ff1ea371e17e6a9dd5d5fa8b" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.57.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3706f904af696642a60a37544b77bd41" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.57.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "8705b8034e489b28e32234ea90115344" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.57.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "e4c81930e56ab07413b11ac89118dc7a" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.57.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3202ff811c56a803f509fb85ef72f042" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.58.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "6f4599b40ef9e96f4c642b0d18788ecc" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.58.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "3d98121d54a5f76ef4de1818d192ef98" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.58.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "c1537f8418da0df05b0dec02b19ee3fe" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.58.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "41f9946a954c8bed3b193196e976a06f" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.58.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "9613f7050256178e7cfc8f37299f4242" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.58.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "a666fb5b8c6a2811470c0cb8b3ba1ec2" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.59.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "1cd2669b7f85cfc0173f34b7892ae4b3" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.59.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "6fe6d3ac0c7cb1cf5b3c475be61d6b38" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.59.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "e8a5f0c8297b34a8ae41bb4846ceeb34" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.59.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "0a8c025fbd6f9a1b7004f3a340948831" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.59.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "0fc3a33c004e6e25a3d55c9548cebf99" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.59.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "45a626edd005f4fa5f08828b16dda6d7" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.60.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "d42593903c3186aa50e2a8e3eecff67b" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.60.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "f41ac05cc4865f2998f13ba540d30eb2" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.60.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "33fcb1c3360be2458262a46a465dd4d2" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.60.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d5d87bf6aacdb86ae72c0f3f2ab24136" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.60.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "436595d419f45d48a22016897a979a00" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.60.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "0dd8c81f34719cea43ccd5eede71862a" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 462422016, + "records": [ + { + "name": "language_model.model.layers.61.mlp.gate_up_proj.weight", + "shape": [ + 43008, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 462422016, + "byteOffset": 0 + } + ], + "md5sum": "97ad4d9aac8a0250c0b9a1ac8c62031f" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.61.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "570f4ea330519b548a04f68c6101daab" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.61.self_attn.o_proj.weight", + "shape": [ + 5376, + 4096 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f5f359deb7a484ea360f7dff5588296e" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "language_model.model.layers.61.self_attn.q_proj.weight", + "shape": [ + 4096, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "86f0b1c102ce5850213528066146e448" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "language_model.model.layers.61.self_attn.v_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "b1d85991e269b59d4b9c308c4115258d" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 231211008, + "records": [ + { + "name": "language_model.model.layers.61.mlp.down_proj.weight", + "shape": [ + 5376, + 21504 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 231211008, + "byteOffset": 0 + } + ], + "md5sum": "0c508bb9081f12affe33cccd2d72c033" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 24729088, + "records": [ + { + "name": "language_model.model.layers.0.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 10752 + }, + { + "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 21504 + }, + { + "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 32256 + }, + { + "name": "language_model.model.layers.0.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 43008 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.weight", + "shape": [ + 2048, + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 43264 + }, + { + "name": "language_model.model.layers.0.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22063360 + }, + { + "name": "language_model.model.layers.1.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22063616 + }, + { + "name": "language_model.model.layers.1.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22063872 + }, + { + "name": "language_model.model.layers.1.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22064128 + }, + { + "name": "language_model.model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22074880 + }, + { + "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22085632 + }, + { + "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22096384 + }, + { + "name": "language_model.model.layers.2.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22107136 + }, + { + "name": "language_model.model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22117888 + }, + { + "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22128640 + }, + { + "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22139392 + }, + { + "name": "language_model.model.layers.2.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22150144 + }, + { + "name": "language_model.model.layers.2.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22150400 + }, + { + "name": "language_model.model.layers.3.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22150656 + }, + { + "name": "language_model.model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22161408 + }, + { + "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22172160 + }, + { + "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22182912 + }, + { + "name": "language_model.model.layers.3.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22193664 + }, + { + "name": "language_model.model.layers.3.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22193920 + }, + { + "name": "language_model.model.layers.4.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22194176 + }, + { + "name": "language_model.model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22204928 + }, + { + "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22215680 + }, + { + "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22226432 + }, + { + "name": "language_model.model.layers.4.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22237184 + }, + { + "name": "language_model.model.layers.4.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22237440 + }, + { + "name": "language_model.model.layers.5.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22237696 + }, + { + "name": "language_model.model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22248448 + }, + { + "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22259200 + }, + { + "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22269952 + }, + { + "name": "language_model.model.layers.5.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22280704 + }, + { + "name": "language_model.model.layers.5.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22280960 + }, + { + "name": "language_model.model.layers.6.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22281216 + }, + { + "name": "language_model.model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22291968 + }, + { + "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22302720 + }, + { + "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22313472 + }, + { + "name": "language_model.model.layers.6.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22324224 + }, + { + "name": "language_model.model.layers.6.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22324480 + }, + { + "name": "language_model.model.layers.7.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22324736 + }, + { + "name": "language_model.model.layers.7.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22324992 + }, + { + "name": "language_model.model.layers.10.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22325248 + }, + { + "name": "language_model.model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22336000 + }, + { + "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22346752 + }, + { + "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22357504 + }, + { + "name": "language_model.model.layers.10.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22368256 + }, + { + "name": "language_model.model.layers.10.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22368512 + }, + { + "name": "language_model.model.layers.11.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22368768 + }, + { + "name": "language_model.model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22379520 + }, + { + "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22390272 + }, + { + "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22401024 + }, + { + "name": "language_model.model.layers.11.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22411776 + }, + { + "name": "language_model.model.layers.11.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22412032 + }, + { + "name": "language_model.model.layers.12.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22412288 + }, + { + "name": "language_model.model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22423040 + }, + { + "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22433792 + }, + { + "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22444544 + }, + { + "name": "language_model.model.layers.12.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22455296 + }, + { + "name": "language_model.model.layers.12.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22455552 + }, + { + "name": "language_model.model.layers.13.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22455808 + }, + { + "name": "language_model.model.layers.13.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22456064 + }, + { + "name": "language_model.model.layers.7.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22456320 + }, + { + "name": "language_model.model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22467072 + }, + { + "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22477824 + }, + { + "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22488576 + }, + { + "name": "language_model.model.layers.8.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22499328 + }, + { + "name": "language_model.model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22510080 + }, + { + "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22520832 + }, + { + "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22531584 + }, + { + "name": "language_model.model.layers.8.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22542336 + }, + { + "name": "language_model.model.layers.8.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22542592 + }, + { + "name": "language_model.model.layers.9.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22542848 + }, + { + "name": "language_model.model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22553600 + }, + { + "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22564352 + }, + { + "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22575104 + }, + { + "name": "language_model.model.layers.9.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22585856 + }, + { + "name": "language_model.model.layers.9.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22586112 + }, + { + "name": "language_model.model.layers.13.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22586368 + }, + { + "name": "language_model.model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22597120 + }, + { + "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22607872 + }, + { + "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22618624 + }, + { + "name": "language_model.model.layers.14.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22629376 + }, + { + "name": "language_model.model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22640128 + }, + { + "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22650880 + }, + { + "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22661632 + }, + { + "name": "language_model.model.layers.14.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22672384 + }, + { + "name": "language_model.model.layers.14.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22672640 + }, + { + "name": "language_model.model.layers.15.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22672896 + }, + { + "name": "language_model.model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22683648 + }, + { + "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22694400 + }, + { + "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22705152 + }, + { + "name": "language_model.model.layers.15.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22715904 + }, + { + "name": "language_model.model.layers.15.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22716160 + }, + { + "name": "language_model.model.layers.16.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22716416 + }, + { + "name": "language_model.model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22727168 + }, + { + "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22737920 + }, + { + "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22748672 + }, + { + "name": "language_model.model.layers.16.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22759424 + }, + { + "name": "language_model.model.layers.16.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22759680 + }, + { + "name": "language_model.model.layers.17.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22759936 + }, + { + "name": "language_model.model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22770688 + }, + { + "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22781440 + }, + { + "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22792192 + }, + { + "name": "language_model.model.layers.17.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22802944 + }, + { + "name": "language_model.model.layers.17.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22803200 + }, + { + "name": "language_model.model.layers.18.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22803456 + }, + { + "name": "language_model.model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22814208 + }, + { + "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22824960 + }, + { + "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22835712 + }, + { + "name": "language_model.model.layers.18.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22846464 + }, + { + "name": "language_model.model.layers.18.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22846720 + }, + { + "name": "language_model.model.layers.19.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22846976 + }, + { + "name": "language_model.model.layers.19.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22847232 + }, + { + "name": "language_model.model.layers.19.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22847488 + }, + { + "name": "language_model.model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22858240 + }, + { + "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22868992 + }, + { + "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22879744 + }, + { + "name": "language_model.model.layers.20.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22890496 + }, + { + "name": "language_model.model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22901248 + }, + { + "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22912000 + }, + { + "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22922752 + }, + { + "name": "language_model.model.layers.20.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22933504 + }, + { + "name": "language_model.model.layers.20.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22933760 + }, + { + "name": "language_model.model.layers.21.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22934016 + }, + { + "name": "language_model.model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22944768 + }, + { + "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22955520 + }, + { + "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22966272 + }, + { + "name": "language_model.model.layers.21.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22977024 + }, + { + "name": "language_model.model.layers.21.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22977280 + }, + { + "name": "language_model.model.layers.22.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22977536 + }, + { + "name": "language_model.model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22988288 + }, + { + "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 22999040 + }, + { + "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23009792 + }, + { + "name": "language_model.model.layers.22.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23020544 + }, + { + "name": "language_model.model.layers.22.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23020800 + }, + { + "name": "language_model.model.layers.23.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23021056 + }, + { + "name": "language_model.model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23031808 + }, + { + "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23042560 + }, + { + "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23053312 + }, + { + "name": "language_model.model.layers.23.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23064064 + }, + { + "name": "language_model.model.layers.23.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23064320 + }, + { + "name": "language_model.model.layers.24.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23064576 + }, + { + "name": "language_model.model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23075328 + }, + { + "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23086080 + }, + { + "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23096832 + }, + { + "name": "language_model.model.layers.24.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23107584 + }, + { + "name": "language_model.model.layers.24.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23107840 + }, + { + "name": "language_model.model.layers.25.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23108096 + }, + { + "name": "language_model.model.layers.25.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23108352 + }, + { + "name": "language_model.model.layers.25.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23108608 + }, + { + "name": "language_model.model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23119360 + }, + { + "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23130112 + }, + { + "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23140864 + }, + { + "name": "language_model.model.layers.26.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23151616 + }, + { + "name": "language_model.model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23162368 + }, + { + "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23173120 + }, + { + "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23183872 + }, + { + "name": "language_model.model.layers.26.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23194624 + }, + { + "name": "language_model.model.layers.26.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23194880 + }, + { + "name": "language_model.model.layers.27.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23195136 + }, + { + "name": "language_model.model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23205888 + }, + { + "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23216640 + }, + { + "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23227392 + }, + { + "name": "language_model.model.layers.27.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23238144 + }, + { + "name": "language_model.model.layers.27.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23238400 + }, + { + "name": "language_model.model.layers.28.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23238656 + }, + { + "name": "language_model.model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23249408 + }, + { + "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23260160 + }, + { + "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23270912 + }, + { + "name": "language_model.model.layers.28.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23281664 + }, + { + "name": "language_model.model.layers.28.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23281920 + }, + { + "name": "language_model.model.layers.29.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23282176 + }, + { + "name": "language_model.model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23292928 + }, + { + "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23303680 + }, + { + "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23314432 + }, + { + "name": "language_model.model.layers.29.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23325184 + }, + { + "name": "language_model.model.layers.29.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23325440 + }, + { + "name": "language_model.model.layers.30.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23325696 + }, + { + "name": "language_model.model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23336448 + }, + { + "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23347200 + }, + { + "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23357952 + }, + { + "name": "language_model.model.layers.30.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23368704 + }, + { + "name": "language_model.model.layers.30.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23368960 + }, + { + "name": "language_model.model.layers.31.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23369216 + }, + { + "name": "language_model.model.layers.31.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23369472 + }, + { + "name": "language_model.model.layers.31.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23369728 + }, + { + "name": "language_model.model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23380480 + }, + { + "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23391232 + }, + { + "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23401984 + }, + { + "name": "language_model.model.layers.32.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23412736 + }, + { + "name": "language_model.model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23423488 + }, + { + "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23434240 + }, + { + "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23444992 + }, + { + "name": "language_model.model.layers.32.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23455744 + }, + { + "name": "language_model.model.layers.32.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23456000 + }, + { + "name": "language_model.model.layers.33.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23456256 + }, + { + "name": "language_model.model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23467008 + }, + { + "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23477760 + }, + { + "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23488512 + }, + { + "name": "language_model.model.layers.33.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23499264 + }, + { + "name": "language_model.model.layers.33.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23499520 + }, + { + "name": "language_model.model.layers.34.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23499776 + }, + { + "name": "language_model.model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23510528 + }, + { + "name": "language_model.model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23521280 + }, + { + "name": "language_model.model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23532032 + }, + { + "name": "language_model.model.layers.34.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23542784 + }, + { + "name": "language_model.model.layers.34.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23543040 + }, + { + "name": "language_model.model.layers.35.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23543296 + }, + { + "name": "language_model.model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23554048 + }, + { + "name": "language_model.model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23564800 + }, + { + "name": "language_model.model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23575552 + }, + { + "name": "language_model.model.layers.35.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23586304 + }, + { + "name": "language_model.model.layers.35.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23586560 + }, + { + "name": "language_model.model.layers.36.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23586816 + }, + { + "name": "language_model.model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23597568 + }, + { + "name": "language_model.model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23608320 + }, + { + "name": "language_model.model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23619072 + }, + { + "name": "language_model.model.layers.36.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23629824 + }, + { + "name": "language_model.model.layers.36.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23630080 + }, + { + "name": "language_model.model.layers.37.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23630336 + }, + { + "name": "language_model.model.layers.37.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23630592 + }, + { + "name": "language_model.model.layers.37.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23630848 + }, + { + "name": "language_model.model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23641600 + }, + { + "name": "language_model.model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23652352 + }, + { + "name": "language_model.model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23663104 + }, + { + "name": "language_model.model.layers.38.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23673856 + }, + { + "name": "language_model.model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23684608 + }, + { + "name": "language_model.model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23695360 + }, + { + "name": "language_model.model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23706112 + }, + { + "name": "language_model.model.layers.38.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23716864 + }, + { + "name": "language_model.model.layers.38.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23717120 + }, + { + "name": "language_model.model.layers.39.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23717376 + }, + { + "name": "language_model.model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23728128 + }, + { + "name": "language_model.model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23738880 + }, + { + "name": "language_model.model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23749632 + }, + { + "name": "language_model.model.layers.39.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23760384 + }, + { + "name": "language_model.model.layers.39.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23760640 + }, + { + "name": "language_model.model.layers.40.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23760896 + }, + { + "name": "language_model.model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23771648 + }, + { + "name": "language_model.model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23782400 + }, + { + "name": "language_model.model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23793152 + }, + { + "name": "language_model.model.layers.40.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23803904 + }, + { + "name": "language_model.model.layers.40.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23804160 + }, + { + "name": "language_model.model.layers.41.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23804416 + }, + { + "name": "language_model.model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23815168 + }, + { + "name": "language_model.model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23825920 + }, + { + "name": "language_model.model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23836672 + }, + { + "name": "language_model.model.layers.41.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23847424 + }, + { + "name": "language_model.model.layers.41.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23847680 + }, + { + "name": "language_model.model.layers.42.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23847936 + }, + { + "name": "language_model.model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23858688 + }, + { + "name": "language_model.model.layers.42.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23869440 + }, + { + "name": "language_model.model.layers.42.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23880192 + }, + { + "name": "language_model.model.layers.42.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23890944 + }, + { + "name": "language_model.model.layers.42.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23891200 + }, + { + "name": "language_model.model.layers.43.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23891456 + }, + { + "name": "language_model.model.layers.43.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23891712 + }, + { + "name": "language_model.model.layers.43.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23891968 + }, + { + "name": "language_model.model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23902720 + }, + { + "name": "language_model.model.layers.43.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23913472 + }, + { + "name": "language_model.model.layers.43.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23924224 + }, + { + "name": "language_model.model.layers.44.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23934976 + }, + { + "name": "language_model.model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23945728 + }, + { + "name": "language_model.model.layers.44.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23956480 + }, + { + "name": "language_model.model.layers.44.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23967232 + }, + { + "name": "language_model.model.layers.44.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23977984 + }, + { + "name": "language_model.model.layers.44.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23978240 + }, + { + "name": "language_model.model.layers.45.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23978496 + }, + { + "name": "language_model.model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 23989248 + }, + { + "name": "language_model.model.layers.45.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24000000 + }, + { + "name": "language_model.model.layers.45.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24010752 + }, + { + "name": "language_model.model.layers.45.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24021504 + }, + { + "name": "language_model.model.layers.45.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24021760 + }, + { + "name": "language_model.model.layers.46.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24022016 + }, + { + "name": "language_model.model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24032768 + }, + { + "name": "language_model.model.layers.46.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24043520 + }, + { + "name": "language_model.model.layers.46.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24054272 + }, + { + "name": "language_model.model.layers.46.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24065024 + }, + { + "name": "language_model.model.layers.46.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24065280 + }, + { + "name": "language_model.model.layers.47.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24065536 + }, + { + "name": "language_model.model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24076288 + }, + { + "name": "language_model.model.layers.47.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24087040 + }, + { + "name": "language_model.model.layers.47.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24097792 + }, + { + "name": "language_model.model.layers.47.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24108544 + }, + { + "name": "language_model.model.layers.47.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24108800 + }, + { + "name": "language_model.model.layers.48.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24109056 + }, + { + "name": "language_model.model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24119808 + }, + { + "name": "language_model.model.layers.48.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24130560 + }, + { + "name": "language_model.model.layers.48.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24141312 + }, + { + "name": "language_model.model.layers.48.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24152064 + }, + { + "name": "language_model.model.layers.48.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24152320 + }, + { + "name": "language_model.model.layers.49.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24152576 + }, + { + "name": "language_model.model.layers.49.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24152832 + }, + { + "name": "language_model.model.layers.49.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24153088 + }, + { + "name": "language_model.model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24163840 + }, + { + "name": "language_model.model.layers.49.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24174592 + }, + { + "name": "language_model.model.layers.49.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24185344 + }, + { + "name": "language_model.model.layers.50.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24196096 + }, + { + "name": "language_model.model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24206848 + }, + { + "name": "language_model.model.layers.50.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24217600 + }, + { + "name": "language_model.model.layers.50.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24228352 + }, + { + "name": "language_model.model.layers.50.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24239104 + }, + { + "name": "language_model.model.layers.50.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24239360 + }, + { + "name": "language_model.model.layers.51.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24239616 + }, + { + "name": "language_model.model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24250368 + }, + { + "name": "language_model.model.layers.51.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24261120 + }, + { + "name": "language_model.model.layers.51.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24271872 + }, + { + "name": "language_model.model.layers.51.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24282624 + }, + { + "name": "language_model.model.layers.51.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24282880 + }, + { + "name": "language_model.model.layers.52.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24283136 + }, + { + "name": "language_model.model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24293888 + }, + { + "name": "language_model.model.layers.52.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24304640 + }, + { + "name": "language_model.model.layers.52.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24315392 + }, + { + "name": "language_model.model.layers.52.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24326144 + }, + { + "name": "language_model.model.layers.52.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24326400 + }, + { + "name": "language_model.model.layers.53.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24326656 + }, + { + "name": "language_model.model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24337408 + }, + { + "name": "language_model.model.layers.53.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24348160 + }, + { + "name": "language_model.model.layers.53.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24358912 + }, + { + "name": "language_model.model.layers.53.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24369664 + }, + { + "name": "language_model.model.layers.53.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24369920 + }, + { + "name": "language_model.model.layers.54.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24370176 + }, + { + "name": "language_model.model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24380928 + }, + { + "name": "language_model.model.layers.54.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24391680 + }, + { + "name": "language_model.model.layers.54.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24402432 + }, + { + "name": "language_model.model.layers.54.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24413184 + }, + { + "name": "language_model.model.layers.54.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24413440 + }, + { + "name": "language_model.model.layers.55.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24413696 + }, + { + "name": "language_model.model.layers.55.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24413952 + }, + { + "name": "language_model.model.layers.55.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24414208 + }, + { + "name": "language_model.model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24424960 + }, + { + "name": "language_model.model.layers.55.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24435712 + }, + { + "name": "language_model.model.layers.55.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24446464 + }, + { + "name": "language_model.model.layers.56.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24457216 + }, + { + "name": "language_model.model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24467968 + }, + { + "name": "language_model.model.layers.56.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24478720 + }, + { + "name": "language_model.model.layers.56.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24489472 + }, + { + "name": "language_model.model.layers.56.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24500224 + }, + { + "name": "language_model.model.layers.56.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24500480 + }, + { + "name": "language_model.model.layers.57.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24500736 + }, + { + "name": "language_model.model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24511488 + }, + { + "name": "language_model.model.layers.57.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24522240 + }, + { + "name": "language_model.model.layers.57.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24532992 + }, + { + "name": "language_model.model.layers.57.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24543744 + }, + { + "name": "language_model.model.layers.57.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24544000 + }, + { + "name": "language_model.model.layers.58.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24544256 + }, + { + "name": "language_model.model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24555008 + }, + { + "name": "language_model.model.layers.58.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24565760 + }, + { + "name": "language_model.model.layers.58.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24576512 + }, + { + "name": "language_model.model.layers.58.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24587264 + }, + { + "name": "language_model.model.layers.58.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24587520 + }, + { + "name": "language_model.model.layers.59.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24587776 + }, + { + "name": "language_model.model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24598528 + }, + { + "name": "language_model.model.layers.59.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24609280 + }, + { + "name": "language_model.model.layers.59.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24620032 + }, + { + "name": "language_model.model.layers.59.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24630784 + }, + { + "name": "language_model.model.layers.59.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24631040 + }, + { + "name": "language_model.model.layers.60.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24631296 + }, + { + "name": "language_model.model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24642048 + }, + { + "name": "language_model.model.layers.60.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24652800 + }, + { + "name": "language_model.model.layers.60.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24663552 + }, + { + "name": "language_model.model.layers.60.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24674304 + }, + { + "name": "language_model.model.layers.60.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24674560 + }, + { + "name": "language_model.model.layers.61.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24674816 + }, + { + "name": "language_model.model.layers.61.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24675072 + }, + { + "name": "language_model.model.layers.61.input_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24675328 + }, + { + "name": "language_model.model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24686080 + }, + { + "name": "language_model.model.layers.61.post_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24696832 + }, + { + "name": "language_model.model.layers.61.pre_feedforward_layernorm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24707584 + }, + { + "name": "language_model.model.norm.weight", + "shape": [ + 5376 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 10752, + "byteOffset": 24718336 + } + ], + "md5sum": "b55393954468d05176fc671a577cbf72" + } + ] +} \ No newline at end of file