|
{ |
|
"metadata": { |
|
"ParamSize": 582, |
|
"ParamBytes": 7409305600.0, |
|
"BitsPerParam": 5.005486689697216 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 128716800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.embed_in.q_weight", |
|
"shape": [ |
|
50280, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 128716800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fef62d69e41be2421e13e12edfc8c380" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.0.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f5b0c168111dd5485ab455aae5d3a84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21076480, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.embed_in.q_scale", |
|
"shape": [ |
|
50280, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 16089600, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 16089600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 16099840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 16110080 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 16120320 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 16130560 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 21045760 |
|
} |
|
], |
|
"md5sum": "f1671dca19e40d3a208895e33c8036d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f03f191b1991155e07f4194c68d783df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2121acfe7ef75de0261adf9d9bdbbc95" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.1.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "94aa9de53a400bc29408d86ed2539a99" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.0.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "916115ad7ff8c55edeec248b19c83a2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4297fc8ff494c39237bbbff704f736bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "95c1b8940b0ccabe97bdcd42e319d877" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.2.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90eef1fe8a11c927f70b91b1d998b7a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.1.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "c07a653627b9137bda73983e567fed27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd62d6605d8ed851417533e4b14cb6eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "719b4cdab16d959b990230c851a1013f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.3.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "10db233b0638864f43143b133babc7b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.2.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "46df777db455afa13c2ad2e7dda8ac57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9958c1f2fb72b9f07d0ef07f48069ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0dfc3369fcb1ccdfac1eb3a9a763e81" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.4.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a5b12c5fba3639244b8338837263f17" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.3.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "2fd49c60256ae93efbc26b8e6b92e86d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3ee34161e49a1d31dd102f4a6c070d1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8939c4440e9b71ebf009d783c84c34c0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.5.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "529d628284382cf545d77b8681608b69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.4.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "0a56bf448699fd0922ad7bc1a92e757e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "765c93e6305bf9c97f2b34ac3ead2025" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20bb4b715ca5b32a8ae17dc1b484c5c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.6.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b60aa88727bc25f40a03338ffa632c25" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.5.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "938fe0077893b64d9dcb0555e8a72f00" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "09859d650478654a874b8ba93dc89b53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8efef363b5c0bfa78f77476b15ac9850" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.7.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e61eb36d8c0def33c7e2d366714e9cc4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.6.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "68db304cf9c3acbefe82c1adb389febc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4a0becf0ee4254b8b302c4ae4c8a6e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "84beb4f191963476d5505e3afba3c440" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.8.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0c08e22013edec5177ee157bb97ae17" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.7.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "b1f54e0e4efbad4542db29561bc8d2c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fc973c8c805c6a4f0ca776b378981fea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07897b43c6604a9e662f7bdad49a21e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.9.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e4e299b8d07ebbf18efd36014e0c435" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.8.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "a49c8fb954446fd5937f98d6969a87ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0daa47a51bbf2b170b10211cfc2667f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4384f132527a76cd364cfd57f1e1103c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.10.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1bd0b25c4ca5560437a41a2a72b38db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.9.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "14e7a87f13022dc3e1a4719efe71a4ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "989f511b50da70f47433167f4bf29302" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a938162be5e18f2b8dbe139330bd5916" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.11.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b51defe248345f2b9125107612a3edad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.10.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "0277b4ceabd0871d7ceb470dec3e9a05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aeb7c6228094fae4ec42081a21278ed4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90891ef6183fab7db354688165fde1c0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.12.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b67e28552026e9f01022f110eb7bc01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.11.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "945f0aa99e97c105d9c33c4ef6241444" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0f6e811dd4de501638c53c6b09dc7ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "011c05e569ff80154da894aae53754cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.13.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "030ec1307746164e9d6a83c60821da33" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.12.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "2f35f668a4c8a25aa3ff0bcdfc153abb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "44006ecfbcf1507bae9996783d975f1b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f2e946729d8304df587793934e68a9b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.14.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98ed1e73b040e88b768a2d3f1e50e0a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.13.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "9a8392763c1ad8b9646f88647670deb1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80c00fd475ed470f5283646fcf0de2fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de45e4f171e55058e7dc0cb5edd1251b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.15.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9975a7cadc023015d589e7b5926b214a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.14.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "b6c6a2b259c15c0f2c6d0334b7149ce5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f653b481edc4b303085dc92bfce8c1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d1b0381e6a73262904aeb7c4d9830e22" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.16.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b1bcb3d8a97b25ea2e1027357290bac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.15.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "594c8937fa49db8c4cf532b7687513e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.16.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58017a37da8e533d82c3525a500b2d82" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.16.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7392d0dae9766064f7bda4bc41b3c0e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.17.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dfa6fa31439d3c2801ed0712648bbedd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.16.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.16.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "57bac99dcef0ad6d9144ef6b630c04a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.17.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9b2abc7d90161b79f9ed5361a772e949" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.17.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9b27dafab7f24815820c5033d92484e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.18.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75a0a04664cd4ba84518e4d5061ab1f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.17.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.17.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "03c87758fffe6e300abb9fd6e4c5b949" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.18.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eef3eb131f3e6f7bb8a863b53c97e893" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.18.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "53effd03e7c5b487d9c0b5f5c169d8b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.19.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d547ca99bc1024546e66ff6822665cf4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.18.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.18.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "bf3d650dff6affb36effda48e07ad265" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.19.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd974d4bce57f511ccedd3697d358002" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.19.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9e1e97ea77868658ca6ce1f1dd87f82" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.20.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd7bc422872907148f0c665c6a27f6c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.19.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.19.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "d44dc1855793089555dd8a13b476465a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.20.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76713c776ed71f2304f012266e0b0806" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.20.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c87051320a49de15b1c4716007670e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.21.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d99ee224dbdf2a288422f5c7049e831" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.20.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.20.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "af9ac041b33b5dd1a712ad66deb37b73" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.21.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d006f21a2706007276ae747b7889e15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.21.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b1ce5b5e8e857eacbac9ef4a7f368224" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.22.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c2b369c4d408d8b934b9caea8ebeaef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.21.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.21.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "aa5db9352fd4473458d291c0c9a815af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.22.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "276933c3b0f61fd74367c811903e2c47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.22.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3112bbd32345943f0efd20ee4b8594d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.23.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e61d654a9efed618d12dcc866914c41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.22.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.22.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "f8e9bb2636adaae8f4dbe82a292e7e09" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.23.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "326e98b8b4c20b529d073ba7196d778e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.23.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be47596a4d40d19cee92240aebed1485" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.24.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67b057b68c0d9c3a71bb43d0c4a6cbe6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.23.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.23.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "6881bd568e4dcfaaaca11429e64337d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.24.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f27a4a8e196349b54c0acc0af918bca0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.24.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a1868ee18eb9e29788f9ef20867602c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.25.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c89e2bcee5c77d8cfcddaf65b8c4f07c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.24.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.24.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "26cf7834795c05706ecd7bf9453d6ad8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.25.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc1672b7a535f6e0968937ac46783dac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.25.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "46337d1db8ef719e1760133215efb697" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.26.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26c03737fcbd378ca1070f86c667f33f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.25.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.25.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "0592187bb8840fc8f6a6bb9c2037e843" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.26.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f85fb1b4c04c54f69729c490b8a27bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.26.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc02db0b153d93399633c2035c917f5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.27.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68fd99a88ac8b830013caa69cb43f162" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.26.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.26.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "0322195e7dd0fb8acc6b2065cc5b3a52" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.27.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa4267e7b7df1157b2f243986d010ecb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.27.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1abd02a96e4db32e76d0917dff55a211" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.28.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4cb57488f81275872591db930242bd96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.27.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.27.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "dfe8804514d91b94d58312ac625c1b78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.28.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c8cf1d50b6cd602228fcd9eae7c33ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.28.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e28954a20b845c58565a06db6b0a9b23" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.29.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3252b60251ccbaaa9e8ac35d7f6f8b87" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.28.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.28.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "00c170fc1cab0e17a7da470cc643721f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.29.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "015c56aed43dd2b956a7b24ce46f1502" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.29.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e59b08c0d0a95f1f62544ef20e483e0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.30.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "043e6fea2a14c310691b630b0430a05e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.29.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.29.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "5efef3d804f28d3517bb8d27f1b8fd76" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.30.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f8c9f702dfbd7192afd36f0977e78945" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.30.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b72db9f72da8eaa3d8e406f0705b9601" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.31.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96605655305fd1022175d7456382ac0f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.30.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.30.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "315c24b3450bee7df6448433304dd069" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.31.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9dda61f59747d22f19e464baa2cff6ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.31.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a67ff6b5b0dcb71be4d920bd691de7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.32.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a5a94d357e21a7be535f383c8c04d480" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.31.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.31.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "3256a7a1e76432f9700af365d0e041a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.32.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "acecb2f2ba9ff46123cd817b019ad58c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.32.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "92f4d0476e0760d1abe03f578bd9d7b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.33.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b0857f5b3c78baf20ff137bd415eb0f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.32.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.32.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "be063081b18d1f76d5887f5c9b73b384" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.33.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b333e415c14550dc301803a928e906d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.33.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b3d90311850d141a15daf05f7dcc151a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.34.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8beb451e22cf35cd27c260f0c35536f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.33.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.33.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "56ca4ffb6dee6f1a71a3d7357c1d7db6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.34.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e5cdb82b8b18f46b5d744db9bbb294e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.34.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "675958b7baddffbe31c64a528a13154b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.35.attention.query_key_value.q_weight", |
|
"shape": [ |
|
15360, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e7f6dcd71e347ebd30eae42303796bf9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32901120, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.34.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.34.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.input_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27934720 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.post_attention_layernorm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27944960 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.attention.query_key_value.q_scale", |
|
"shape": [ |
|
15360, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4915200, |
|
"byteOffset": 27955200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.attention.query_key_value.bias", |
|
"shape": [ |
|
15360 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 30720, |
|
"byteOffset": 32870400 |
|
} |
|
], |
|
"md5sum": "3c80e0a649a62b23989ace96f773230f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.35.mlp.dense_h_to_4h.q_weight", |
|
"shape": [ |
|
20480, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73055c51111ab4599752621e31d8ebd8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.35.mlp.dense_4h_to_h.q_weight", |
|
"shape": [ |
|
5120, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d434babb295a3dda9e50b15bfe66587a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 128716800, |
|
"records": [ |
|
{ |
|
"name": "embed_out.q_weight", |
|
"shape": [ |
|
50280, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 128716800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48bfa7329be22cea3ab8d58841b47424" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27934720, |
|
"records": [ |
|
{ |
|
"name": "gpt_neox.layers.35.attention.dense.q_weight", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.attention.dense.q_scale", |
|
"shape": [ |
|
5120, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1638400, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.attention.dense.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 14745600 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.mlp.dense_h_to_4h.q_scale", |
|
"shape": [ |
|
20480, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 14755840 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.mlp.dense_h_to_4h.bias", |
|
"shape": [ |
|
20480 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 40960, |
|
"byteOffset": 21309440 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.mlp.dense_4h_to_h.q_scale", |
|
"shape": [ |
|
5120, |
|
640 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6553600, |
|
"byteOffset": 21350400 |
|
}, |
|
{ |
|
"name": "gpt_neox.layers.35.mlp.dense_4h_to_h.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27904000 |
|
}, |
|
{ |
|
"name": "gpt_neox.final_layer_norm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27914240 |
|
}, |
|
{ |
|
"name": "gpt_neox.final_layer_norm.bias", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 10240, |
|
"byteOffset": 27924480 |
|
} |
|
], |
|
"md5sum": "6a8baa6e41155d7a03b6829998a2cbf2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16089600, |
|
"records": [ |
|
{ |
|
"name": "embed_out.q_scale", |
|
"shape": [ |
|
50280, |
|
160 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 16089600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1ff842137a8df9227bdbad4da1ce8c3" |
|
} |
|
] |
|
} |