| { | |
| "metadata": { | |
| "ParamSize": 245, | |
| "ParamBytes": 5673082880.0, | |
| "BitsPerParam": 32.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 209715200, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.weight", | |
| "shape": [ | |
| 51200, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 209715200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a8605021ea053754a003da8f06fbc97d" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25186304, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 4096 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 8192 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 25174016 | |
| } | |
| ], | |
| "md5sum": "67ccf81e4bd8f1e274cd98665830a10c" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e076ced0c11b935afabbba1275aa7aee" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "198a6696ab66cfc44e6de5ca933714a0" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a42e1763dc3b7270c58676580dd1b6ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2a166ab4945052ec0e29831b6a359eda" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "17a86d7400a79bc95cf1e9ffe6d4d731" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6f63c27ca3c8968b879f35738d05b4b6" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "42c6a506ee2b498aa4785c39c083ecb5" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "405d421b4f39a5e1d9b0cd86d00d004c" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "13e8da8950bd42c86f47e5661c533733" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25300992, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8409088 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8413184 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8417280 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 8421376 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8433664 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16822272 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16826368 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16842752 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16846848 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16850944 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16855040 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16867328 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25255936 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25260032 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25276416 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25280512 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25284608 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 25288704 | |
| } | |
| ], | |
| "md5sum": "441d71a59fbda076c591f71a0e1c0341" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d1bc93af8197956ea694b53bbbc89327" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "925af2d99c71e060c03fd75185b1f497" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bef40ffab99d9a572e2eacf9587b7cde" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "02e3a1ccebee6388c275db73dbf80ccd" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b5d7ca5772e5a0cdf368c2e97fdaf1da" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "016bacfca2f07ba2ce7eae7994892885" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5f2dde01733bdfc9c4dee4a748f5a0d5" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "be3112ade0c6036995022be14d66df62" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f2d5888e1d00543276a2111e4070ad9d" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25300992, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8409088 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8413184 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8417280 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 8421376 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8433664 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16822272 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16826368 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16842752 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16846848 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16850944 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16855040 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16867328 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25255936 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25260032 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25276416 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25280512 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25284608 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 25288704 | |
| } | |
| ], | |
| "md5sum": "f54084a552b984c57670e88319c99771" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bd13cb4ffcdafbc8a8c3ec1bc26ffbaf" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "afd5111978ff8a2ead2c1c5e859431f4" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6033c3bf1cc8161c0a24e3b7565e1ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "78bfee79747e8f2880fefaca8e28e590" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7a0db5ea5b8d44adec0d55b846352a55" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1bf2abff697e6cee5082d2698d63b91b" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "87f8eded440503b68b96ce961c60b8d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "135bc520b75aa6bf960914a4065ee855" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e0a304a5407da6b6573bfcf6aff681c" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25300992, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8409088 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8413184 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8417280 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 8421376 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8433664 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16822272 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16826368 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16842752 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16846848 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16850944 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16855040 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16867328 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25255936 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25260032 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25276416 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25280512 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25284608 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 25288704 | |
| } | |
| ], | |
| "md5sum": "12e43363710fe56560b9be1e4dd6a4f1" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2f9f098ffe6ca3e9d853a1ae53ee4151" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e7f8c47a549d5e01028890dcc9eb8ba9" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d50102e665e1d8c4b0094615b04b45ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6a9e62f01eeae00ed53a93107dd8a9c5" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2033ed998e461a7ec8fa4e5df62a6c02" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "440f10657fee337289c0343622595462" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2df3ebab92cc0570ee88d456dc5b4cb7" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "250a12796af0e76d01461b073fd3afd2" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "10bc62ad9d221af31a6ddf798ba44ef4" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25300992, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8409088 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8413184 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8417280 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 8421376 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8433664 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16822272 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16826368 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16842752 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16846848 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16850944 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16855040 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16867328 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25255936 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25260032 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25276416 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25280512 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25284608 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 25288704 | |
| } | |
| ], | |
| "md5sum": "82bb22384077785e0eb8e0a2309454a2" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb7e65ae8e7fd6a12eac45eae124ea56" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "07de806a9ab6d49f16208a649dac1df6" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3115476f8181c9d5cd4277a1a7118cd2" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0cfc90e608da3cb5f6b167c7a65fc01" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "743333866391d30086fc0412c7974e48" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6dd8f20986ccc6c22d5d2de9b3af2171" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "80f482a951f4542f92154d58cd6ee250" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "546c5882774c0f0d72aa31274e68a678" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a3519b85eff52ce45b8bc1c89277a795" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25300992, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8409088 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8413184 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8417280 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 8421376 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8433664 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16822272 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16826368 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16842752 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16846848 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16850944 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16855040 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16867328 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25255936 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25260032 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25276416 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25280512 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25284608 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 25288704 | |
| } | |
| ], | |
| "md5sum": "2b0d035e10be953ad8acb4477ac954b8" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "36124fd7674b8054eac914c55267cea2" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eb9a9f7aa1a2da8c6a4a8ff003dd435b" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7f9c42499cb13bbd3a310ea96e9064b3" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a0b2478ffd2fbe50d8d67271a1935e63" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6da03f35fe9e01208df697422aca69fd" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "22df39c78cbf0398b68f0ded35cc032a" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "68b2acafe9398e0336e8ddd4ea783809" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d9240b93257a5354fc2a02f3e98bd6c3" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d541e5db82d9f19e5734e14afe10331d" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25300992, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8409088 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8413184 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8417280 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 8421376 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8433664 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16822272 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16826368 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16842752 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16846848 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16850944 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16855040 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16867328 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25255936 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25260032 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25276416 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25280512 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25284608 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 25288704 | |
| } | |
| ], | |
| "md5sum": "04f67f56ff47ef38c4980a7941ebd71c" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "edeed64f99b4db0b91c2ca73906eeefa" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "06ea8110c08fc61ac217f0ca00af3f11" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "993ce6b239a07be350df994ad0f5f55a" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "56d8bd166f3938d314fb357ba5b6af29" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a1953efdb537fed81ad3ee64813c8af6" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "91627af652fdfd177f3ffc862f9b85fd" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6e3ced2a50b0cc260d02e86a4aae234d" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f47401d11940ed82b90157a561fc3725" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "263ad0599fdc7e6a1207447cf819f232" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25300992, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8409088 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8413184 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8417280 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 8421376 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8433664 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16822272 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16826368 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16842752 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16846848 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16850944 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16855040 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16867328 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25255936 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25260032 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25276416 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25280512 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25284608 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 25288704 | |
| } | |
| ], | |
| "md5sum": "97afffba16275ee5e5d147fa1c788d18" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2a430f95773ba9d74b2bf46981956085" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8ae40c479c8f397c8e71b1886c095bf5" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "68a29572f1f4b30a706781a1de068c25" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1e7e0e836e24c1b866241b95cfa51fc8" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a3c5ae69e214eb5968920a699625b8a0" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.Wqkv.weight", | |
| "shape": [ | |
| 6144, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "969f0f260ff937665b48356e86d551b8" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.fc1.weight", | |
| "shape": [ | |
| 8192, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "58deb32cec3dc98c9d3dbdc08819de78" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.fc2.weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4facc13015040b6e09345b47bdcd8d80" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 209715200, | |
| "records": [ | |
| { | |
| "name": "lm_head.linear.weight", | |
| "shape": [ | |
| 51200, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 209715200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f3be3e3e8addb4c825a8151cf42860a3" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25391104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8409088 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8413184 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8417280 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 8421376 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8433664 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16822272 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16826368 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16842752 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16846848 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16850944 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16855040 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16867328 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25255936 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25260032 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25276416 | |
| }, | |
| { | |
| "name": "lm_head.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25280512 | |
| }, | |
| { | |
| "name": "lm_head.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25284608 | |
| }, | |
| { | |
| "name": "lm_head.linear.bias", | |
| "shape": [ | |
| 51200 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 102400, | |
| "byteOffset": 25288704 | |
| } | |
| ], | |
| "md5sum": "119a470d69bbc09cc49391e3cfcf5c9f" | |
| } | |
| ] | |
| } |