{ "metadata": { "ParamSize": 582, "ParamBytes": 7409305600.0, "BitsPerParam": 5.005486689697216 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 128716800, "records": [ { "name": "gpt_neox.embed_in.q_weight", "shape": [ 50280, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 128716800, "byteOffset": 0 } ], "md5sum": "fef62d69e41be2421e13e12edfc8c380" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.0.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "6f5b0c168111dd5485ab455aae5d3a84" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 21076480, "records": [ { "name": "gpt_neox.embed_in.q_scale", "shape": [ 50280, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16089600, "byteOffset": 0 }, { "name": "gpt_neox.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 16089600 }, { "name": "gpt_neox.layers.0.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 16099840 }, { "name": "gpt_neox.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 16110080 }, { "name": "gpt_neox.layers.0.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 16120320 }, { "name": "gpt_neox.layers.0.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 16130560 }, { "name": "gpt_neox.layers.0.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 21045760 } ], "md5sum": "f1671dca19e40d3a208895e33c8036d7" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.0.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f03f191b1991155e07f4194c68d783df" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.0.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2121acfe7ef75de0261adf9d9bdbbc95" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.1.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "94aa9de53a400bc29408d86ed2539a99" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.0.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.0.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.0.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.0.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.0.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.0.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.0.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.1.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.1.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.1.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.1.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "916115ad7ff8c55edeec248b19c83a2f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.1.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4297fc8ff494c39237bbbff704f736bb" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.1.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "95c1b8940b0ccabe97bdcd42e319d877" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.2.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "90eef1fe8a11c927f70b91b1d998b7a6" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.1.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.1.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.1.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.1.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.1.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.1.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.1.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.2.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.2.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.2.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.2.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "c07a653627b9137bda73983e567fed27" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.2.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "cd62d6605d8ed851417533e4b14cb6eb" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.2.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "719b4cdab16d959b990230c851a1013f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.3.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "10db233b0638864f43143b133babc7b1" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.2.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.2.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.2.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.2.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.2.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.2.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.2.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.3.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.3.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.3.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.3.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "46df777db455afa13c2ad2e7dda8ac57" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.3.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e9958c1f2fb72b9f07d0ef07f48069ac" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.3.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d0dfc3369fcb1ccdfac1eb3a9a763e81" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.4.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "6a5b12c5fba3639244b8338837263f17" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.3.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.3.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.3.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.3.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.3.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.3.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.3.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.4.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.4.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.4.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.4.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "2fd49c60256ae93efbc26b8e6b92e86d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.4.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3ee34161e49a1d31dd102f4a6c070d1a" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.4.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "8939c4440e9b71ebf009d783c84c34c0" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.5.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "529d628284382cf545d77b8681608b69" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.4.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.4.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.4.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.4.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.4.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.4.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.4.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.5.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.5.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.5.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.5.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "0a56bf448699fd0922ad7bc1a92e757e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.5.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "765c93e6305bf9c97f2b34ac3ead2025" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.5.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "20bb4b715ca5b32a8ae17dc1b484c5c9" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.6.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b60aa88727bc25f40a03338ffa632c25" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.5.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.5.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.5.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.5.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.5.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.5.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.5.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.6.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.6.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.6.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.6.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "938fe0077893b64d9dcb0555e8a72f00" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.6.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "09859d650478654a874b8ba93dc89b53" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.6.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "8efef363b5c0bfa78f77476b15ac9850" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.7.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e61eb36d8c0def33c7e2d366714e9cc4" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.6.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.6.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.6.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.6.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.6.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.6.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.6.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.7.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.7.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.7.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.7.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "68db304cf9c3acbefe82c1adb389febc" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.7.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c4a0becf0ee4254b8b302c4ae4c8a6e1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.7.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "84beb4f191963476d5505e3afba3c440" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.8.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d0c08e22013edec5177ee157bb97ae17" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.7.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.7.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.7.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.7.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.7.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.7.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.7.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.8.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.8.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.8.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.8.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "b1f54e0e4efbad4542db29561bc8d2c7" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.8.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "fc973c8c805c6a4f0ca776b378981fea" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.8.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "07897b43c6604a9e662f7bdad49a21e9" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.9.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0e4e299b8d07ebbf18efd36014e0c435" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.8.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.8.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.8.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.8.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.8.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.8.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.8.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.9.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.9.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.9.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.9.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "a49c8fb954446fd5937f98d6969a87ec" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.9.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c0daa47a51bbf2b170b10211cfc2667f" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.9.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4384f132527a76cd364cfd57f1e1103c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.10.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e1bd0b25c4ca5560437a41a2a72b38db" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.9.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.9.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.9.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.9.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.9.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.9.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.9.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.10.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.10.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.10.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.10.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "14e7a87f13022dc3e1a4719efe71a4ab" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.10.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "989f511b50da70f47433167f4bf29302" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.10.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "a938162be5e18f2b8dbe139330bd5916" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.11.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b51defe248345f2b9125107612a3edad" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.10.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.10.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.10.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.10.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.10.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.10.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.10.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.11.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.11.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.11.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.11.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "0277b4ceabd0871d7ceb470dec3e9a05" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.11.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "aeb7c6228094fae4ec42081a21278ed4" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.11.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "90891ef6183fab7db354688165fde1c0" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.12.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3b67e28552026e9f01022f110eb7bc01" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.11.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.11.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.11.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.11.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.11.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.11.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.11.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.12.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.12.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.12.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.12.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "945f0aa99e97c105d9c33c4ef6241444" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.12.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e0f6e811dd4de501638c53c6b09dc7ac" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.12.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "011c05e569ff80154da894aae53754cd" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.13.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "030ec1307746164e9d6a83c60821da33" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.12.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.12.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.12.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.12.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.12.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.12.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.12.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.13.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.13.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.13.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.13.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "2f35f668a4c8a25aa3ff0bcdfc153abb" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.13.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "44006ecfbcf1507bae9996783d975f1b" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.13.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9f2e946729d8304df587793934e68a9b" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.14.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "98ed1e73b040e88b768a2d3f1e50e0a4" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.13.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.13.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.13.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.13.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.13.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.13.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.13.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.14.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.14.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.14.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.14.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "9a8392763c1ad8b9646f88647670deb1" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.14.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "80c00fd475ed470f5283646fcf0de2fe" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.14.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "de45e4f171e55058e7dc0cb5edd1251b" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.15.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "9975a7cadc023015d589e7b5926b214a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.14.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.14.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.14.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.14.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.14.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.14.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.14.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.15.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.15.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.15.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.15.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "b6c6a2b259c15c0f2c6d0334b7149ce5" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.15.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1f653b481edc4b303085dc92bfce8c1a" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.15.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d1b0381e6a73262904aeb7c4d9830e22" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.16.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "5b1bcb3d8a97b25ea2e1027357290bac" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.15.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.15.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.15.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.15.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.15.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.15.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.15.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.16.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.16.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.16.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.16.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "594c8937fa49db8c4cf532b7687513e7" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.16.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "58017a37da8e533d82c3525a500b2d82" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.16.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7392d0dae9766064f7bda4bc41b3c0e5" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.17.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "dfa6fa31439d3c2801ed0712648bbedd" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.16.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.16.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.16.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.16.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.16.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.16.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.16.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.17.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.17.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.17.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.17.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "57bac99dcef0ad6d9144ef6b630c04a8" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.17.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9b2abc7d90161b79f9ed5361a772e949" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.17.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9b27dafab7f24815820c5033d92484e3" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.18.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "75a0a04664cd4ba84518e4d5061ab1f9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.17.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.17.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.17.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.17.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.17.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.17.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.17.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.18.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.18.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.18.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.18.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "03c87758fffe6e300abb9fd6e4c5b949" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.18.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "eef3eb131f3e6f7bb8a863b53c97e893" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.18.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "53effd03e7c5b487d9c0b5f5c169d8b0" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.19.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d547ca99bc1024546e66ff6822665cf4" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.18.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.18.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.18.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.18.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.18.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.18.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.18.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.19.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.19.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.19.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.19.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "bf3d650dff6affb36effda48e07ad265" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.19.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "cd974d4bce57f511ccedd3697d358002" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.19.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f9e1e97ea77868658ca6ce1f1dd87f82" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.20.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "cd7bc422872907148f0c665c6a27f6c7" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.19.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.19.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.19.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.19.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.19.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.19.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.19.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.20.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.20.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.20.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.20.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "d44dc1855793089555dd8a13b476465a" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.20.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "76713c776ed71f2304f012266e0b0806" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.20.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7c87051320a49de15b1c4716007670e1" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.21.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "9d99ee224dbdf2a288422f5c7049e831" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.20.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.20.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.20.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.20.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.20.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.20.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.20.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.21.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.21.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.21.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.21.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "af9ac041b33b5dd1a712ad66deb37b73" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.21.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "8d006f21a2706007276ae747b7889e15" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.21.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b1ce5b5e8e857eacbac9ef4a7f368224" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.22.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "4c2b369c4d408d8b934b9caea8ebeaef" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.21.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.21.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.21.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.21.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.21.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.21.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.21.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.22.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.22.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.22.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.22.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "aa5db9352fd4473458d291c0c9a815af" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.22.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "276933c3b0f61fd74367c811903e2c47" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.22.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3112bbd32345943f0efd20ee4b8594d6" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.23.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7e61d654a9efed618d12dcc866914c41" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.22.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.22.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.22.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.22.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.22.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.22.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.22.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.23.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.23.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.23.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.23.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "f8e9bb2636adaae8f4dbe82a292e7e09" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.23.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "326e98b8b4c20b529d073ba7196d778e" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.23.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "be47596a4d40d19cee92240aebed1485" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.24.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "67b057b68c0d9c3a71bb43d0c4a6cbe6" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.23.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.23.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.23.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.23.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.23.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.23.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.23.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.24.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.24.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.24.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.24.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "6881bd568e4dcfaaaca11429e64337d1" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.24.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f27a4a8e196349b54c0acc0af918bca0" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.24.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "a1868ee18eb9e29788f9ef20867602c2" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.25.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c89e2bcee5c77d8cfcddaf65b8c4f07c" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.24.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.24.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.24.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.24.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.24.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.24.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.24.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.25.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.25.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.25.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.25.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "26cf7834795c05706ecd7bf9453d6ad8" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.25.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bc1672b7a535f6e0968937ac46783dac" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.25.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "46337d1db8ef719e1760133215efb697" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.26.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "26c03737fcbd378ca1070f86c667f33f" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.25.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.25.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.25.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.25.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.25.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.25.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.25.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.26.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.26.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.26.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.26.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "0592187bb8840fc8f6a6bb9c2037e843" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.26.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6f85fb1b4c04c54f69729c490b8a27bd" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.26.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bc02db0b153d93399633c2035c917f5d" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.27.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "68fd99a88ac8b830013caa69cb43f162" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.26.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.26.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.26.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.26.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.26.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.26.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.26.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.27.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.27.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.27.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.27.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "0322195e7dd0fb8acc6b2065cc5b3a52" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.27.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "aa4267e7b7df1157b2f243986d010ecb" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.27.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1abd02a96e4db32e76d0917dff55a211" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.28.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "4cb57488f81275872591db930242bd96" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.27.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.27.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.27.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.27.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.27.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.27.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.27.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.28.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.28.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.28.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.28.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "dfe8804514d91b94d58312ac625c1b78" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.28.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7c8cf1d50b6cd602228fcd9eae7c33ee" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.28.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e28954a20b845c58565a06db6b0a9b23" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.29.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3252b60251ccbaaa9e8ac35d7f6f8b87" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.28.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.28.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.28.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.28.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.28.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.28.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.28.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.29.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.29.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.29.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.29.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "00c170fc1cab0e17a7da470cc643721f" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.29.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "015c56aed43dd2b956a7b24ce46f1502" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.29.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e59b08c0d0a95f1f62544ef20e483e0a" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.30.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "043e6fea2a14c310691b630b0430a05e" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.29.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.29.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.29.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.29.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.29.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.29.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.29.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.30.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.30.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.30.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.30.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "5efef3d804f28d3517bb8d27f1b8fd76" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.30.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f8c9f702dfbd7192afd36f0977e78945" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.30.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b72db9f72da8eaa3d8e406f0705b9601" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.31.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "96605655305fd1022175d7456382ac0f" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.30.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.30.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.30.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.30.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.30.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.30.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.30.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.31.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.31.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.31.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.31.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "315c24b3450bee7df6448433304dd069" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.31.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9dda61f59747d22f19e464baa2cff6ac" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.31.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3a67ff6b5b0dcb71be4d920bd691de7d" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.32.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a5a94d357e21a7be535f383c8c04d480" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.31.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.31.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.31.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.31.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.31.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.31.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.31.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.32.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.32.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.32.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.32.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "3256a7a1e76432f9700af365d0e041a9" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.32.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "acecb2f2ba9ff46123cd817b019ad58c" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.32.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "92f4d0476e0760d1abe03f578bd9d7b2" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.33.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0b0857f5b3c78baf20ff137bd415eb0f" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.32.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.32.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.32.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.32.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.32.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.32.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.32.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.33.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.33.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.33.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.33.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "be063081b18d1f76d5887f5c9b73b384" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.33.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b333e415c14550dc301803a928e906d6" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.33.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b3d90311850d141a15daf05f7dcc151a" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.34.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "8beb451e22cf35cd27c260f0c35536f3" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.33.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.33.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.33.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.33.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.33.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.33.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.33.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.34.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.34.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.34.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.34.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "56ca4ffb6dee6f1a71a3d7357c1d7db6" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.34.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4e5cdb82b8b18f46b5d744db9bbb294e" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.34.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "675958b7baddffbe31c64a528a13154b" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "gpt_neox.layers.35.attention.query_key_value.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e7f6dcd71e347ebd30eae42303796bf9" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 32901120, "records": [ { "name": "gpt_neox.layers.34.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.34.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.34.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.34.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.34.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.34.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.34.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.layers.35.input_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 }, { "name": "gpt_neox.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27934720 }, { "name": "gpt_neox.layers.35.post_attention_layernorm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27944960 }, { "name": "gpt_neox.layers.35.attention.query_key_value.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27955200 }, { "name": "gpt_neox.layers.35.attention.query_key_value.bias", "shape": [ 15360 ], "dtype": "bfloat16", "format": "raw", "nbytes": 30720, "byteOffset": 32870400 } ], "md5sum": "3c80e0a649a62b23989ace96f773230f" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.35.mlp.dense_h_to_4h.q_weight", "shape": [ 20480, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "73055c51111ab4599752621e31d8ebd8" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "gpt_neox.layers.35.mlp.dense_4h_to_h.q_weight", "shape": [ 5120, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d434babb295a3dda9e50b15bfe66587a" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 128716800, "records": [ { "name": "embed_out.q_weight", "shape": [ 50280, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 128716800, "byteOffset": 0 } ], "md5sum": "48bfa7329be22cea3ab8d58841b47424" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 27934720, "records": [ { "name": "gpt_neox.layers.35.attention.dense.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "gpt_neox.layers.35.attention.dense.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "gpt_neox.layers.35.attention.dense.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "gpt_neox.layers.35.mlp.dense_h_to_4h.q_scale", "shape": [ 20480, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 14755840 }, { "name": "gpt_neox.layers.35.mlp.dense_h_to_4h.bias", "shape": [ 20480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 40960, "byteOffset": 21309440 }, { "name": "gpt_neox.layers.35.mlp.dense_4h_to_h.q_scale", "shape": [ 5120, 640 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6553600, "byteOffset": 21350400 }, { "name": "gpt_neox.layers.35.mlp.dense_4h_to_h.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27904000 }, { "name": "gpt_neox.final_layer_norm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27914240 }, { "name": "gpt_neox.final_layer_norm.bias", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27924480 } ], "md5sum": "6a8baa6e41155d7a03b6829998a2cbf2" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 16089600, "records": [ { "name": "embed_out.q_scale", "shape": [ 50280, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16089600, "byteOffset": 0 } ], "md5sum": "e1ff842137a8df9227bdbad4da1ce8c3" } ] }