| { | |
| "metadata": { | |
| "ParamSize": 198, | |
| "ParamBytes": 3087428608.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 466747392, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.weight", | |
| "shape": [ | |
| 151936, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 466747392, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9c394790c22960ff045f6cecf24f2478" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2b2470b4a9e72adb478d1631f7405cf1" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27535360, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 3072 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 27528192 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27531264 | |
| } | |
| ], | |
| "md5sum": "dc9e08ac4db31734a1b603ee26599517" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "49563c8f4aa9a7eb793010398bfda240" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "51521a14a5183f53e7597fb1b0c1ac02" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3c9458be61e6f6d2f09bba659866fdc5" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e09a65b0e17d3503c7fc7a1ca671a5ba" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "672289b109ad0020f17e9f6844d72eb0" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1468ccdaed4ba067e4867957c993c049" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "386225f7f70fddb6a9e43795ad3a3202" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2f0ee2ee3560e76e2906ba68bed46ccd" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "af10359a4a55734f11c85bd7abd23af3" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bd931a02a4417507ced3902718fd7d78" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a886e033b34ef4492524060ada67c5f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fdf7135c7f050510e918a6da2eb747e4" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "19b7ab0aedab2c98ef84d1bb74677cec" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "40123c949516b96ac14030b58f97c970" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ba52e42aa6e9bfddb33bc6be2cbbed6c" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ef3a674229205ddd58f4e2a2df97af1b" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4f3105a964d9957e5b54bce793846d4d" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e1387ab5bc412368449752b6370e568" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb2e93a9ceaf3233fbd74948a6c249d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8a80c045d2edddb7ef0573f7c564c519" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "2d0661bc87c9fd3e7da9a0d13349380a" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e6d23b39b8cb567cf0a4a7e6714cd78d" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b4df51c707ad996798fd88cca7d4f82d" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "00a26156008979c2af45a95ec82a3a0d" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4d45780eb98ca483a06df9ab8adbe7e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "75221e62142c91edd2dc091cec30665f" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0fbd4abacd6750357450eaac4c8cfba8" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "6fb6758ef41b318d5f5bc1c009f643ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dd93ed00fe037887023cc5e040d40186" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d36d64f0ffa4c128414487032a334a9f" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8ae17a585657e430228933f94477a4bd" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8788042f9ad0c7915afadbe04e31930" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "353c3896a5d202428c9620719a05bca6" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c0f5cd315d9f707d055fc2505442cb4e" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "2a5ef0f2584bde186b674af63e6e7b75" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0cc850be792ebbec970e7ff6f4856ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "36341c148fa89452c08e764f461d6298" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "21773a84fcb6c6e742babfba3cdb8359" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e671efec0402a6cee8977fc5c58e507c" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5bb0260d69401b96e8c6bdebdfca36be" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "59128784d429bdd191282b7d6442addc" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "2e3ff83df81c41b602349fa77b1f52b4" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0d198691fa3d0f8a8f048203a35098c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ae27a680d3d40b1fa8619c493bc41372" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a3da97ef93ff7b3a9f75fe3c61966a07" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9dbf820a69c2940892a3ba78eaa3ac17" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "923a91f17052a3d0f91efd819f28a9e3" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b2e04d25a01a6e878053e3929c326e5c" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "e3772e7e14ef6fa64fc1d690dc042bb6" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "889d5ae8be3b2a52a14a806a87d91a51" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d9e5436000200181c6bb168b3a57895b" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d40f1fddbc729b64c88e261d6b556b48" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3c664af5bf4dc2b72824b91dc57e191b" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5bc317302d4897884b55ec6f42427685" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "add3b34b5d3ff066fc2f581f2a8e6d80" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "ee80628c2a2a049c273a1e9b0b1b7ddb" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "afd49cfb9ea9a046505f1c2b166d4e4a" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9390f2ec375eb8c6156ec82543434e6b" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eaf99bc611ac6669988c0cf21a32eba0" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c910c61629f83e45715ca3cc84de034e" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27525120, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 8960 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27525120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8f0903d34e62052c91a180e686ffa258" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 55050240, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 17920, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55050240, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "be500b3df22deade101f844744892dc1" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33060864, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11013120 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11016192 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 11020288 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17311744 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22030336 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 22033408 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 22036480 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 22040576 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28332032 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33050624 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 33053696 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33056768 | |
| } | |
| ], | |
| "md5sum": "8df7c8c8dc860c0e0a4670128a173b70" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 11013120, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 11010048 | |
| } | |
| ], | |
| "md5sum": "db8b6ec54d311d54bf38489a6a162580" | |
| } | |
| ] | |
| } |