dolly-v2-12b-q4f32_1-MLC / ndarray-cache-b16.json
junrushao's picture
Initial commit
e996e81
{
"metadata": {
"ParamSize": 582,
"ParamBytes": 7409305600.0,
"BitsPerParam": 5.005486689697216
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 128716800,
"records": [
{
"name": "gpt_neox.embed_in.q_weight",
"shape": [
50280,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 128716800,
"byteOffset": 0
}
],
"md5sum": "fef62d69e41be2421e13e12edfc8c380"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.0.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "6f5b0c168111dd5485ab455aae5d3a84"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 21076480,
"records": [
{
"name": "gpt_neox.embed_in.q_scale",
"shape": [
50280,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16089600,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 16089600
},
{
"name": "gpt_neox.layers.0.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 16099840
},
{
"name": "gpt_neox.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 16110080
},
{
"name": "gpt_neox.layers.0.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 16120320
},
{
"name": "gpt_neox.layers.0.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 16130560
},
{
"name": "gpt_neox.layers.0.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 21045760
}
],
"md5sum": "f1671dca19e40d3a208895e33c8036d7"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f03f191b1991155e07f4194c68d783df"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2121acfe7ef75de0261adf9d9bdbbc95"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.1.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "94aa9de53a400bc29408d86ed2539a99"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.0.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.0.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.0.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.1.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.1.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.1.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.1.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "916115ad7ff8c55edeec248b19c83a2f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4297fc8ff494c39237bbbff704f736bb"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "95c1b8940b0ccabe97bdcd42e319d877"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.2.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "90eef1fe8a11c927f70b91b1d998b7a6"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.1.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.1.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.1.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.2.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.2.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.2.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.2.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "c07a653627b9137bda73983e567fed27"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cd62d6605d8ed851417533e4b14cb6eb"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "719b4cdab16d959b990230c851a1013f"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.3.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "10db233b0638864f43143b133babc7b1"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.2.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.2.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.2.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.3.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.3.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.3.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.3.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "46df777db455afa13c2ad2e7dda8ac57"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e9958c1f2fb72b9f07d0ef07f48069ac"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d0dfc3369fcb1ccdfac1eb3a9a763e81"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.4.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "6a5b12c5fba3639244b8338837263f17"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.3.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.3.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.3.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.4.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.4.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.4.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.4.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "2fd49c60256ae93efbc26b8e6b92e86d"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3ee34161e49a1d31dd102f4a6c070d1a"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8939c4440e9b71ebf009d783c84c34c0"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.5.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "529d628284382cf545d77b8681608b69"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.4.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.4.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.4.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.5.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.5.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.5.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.5.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "0a56bf448699fd0922ad7bc1a92e757e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "765c93e6305bf9c97f2b34ac3ead2025"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "20bb4b715ca5b32a8ae17dc1b484c5c9"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.6.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b60aa88727bc25f40a03338ffa632c25"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.5.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.5.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.5.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.6.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.6.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.6.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.6.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "938fe0077893b64d9dcb0555e8a72f00"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "09859d650478654a874b8ba93dc89b53"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8efef363b5c0bfa78f77476b15ac9850"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.7.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e61eb36d8c0def33c7e2d366714e9cc4"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.6.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.6.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.6.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.7.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.7.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.7.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.7.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "68db304cf9c3acbefe82c1adb389febc"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c4a0becf0ee4254b8b302c4ae4c8a6e1"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "84beb4f191963476d5505e3afba3c440"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.8.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d0c08e22013edec5177ee157bb97ae17"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.7.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.7.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.7.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.8.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.8.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.8.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.8.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "b1f54e0e4efbad4542db29561bc8d2c7"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "fc973c8c805c6a4f0ca776b378981fea"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "07897b43c6604a9e662f7bdad49a21e9"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.9.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0e4e299b8d07ebbf18efd36014e0c435"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.8.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.8.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.8.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.9.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.9.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.9.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.9.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "a49c8fb954446fd5937f98d6969a87ec"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c0daa47a51bbf2b170b10211cfc2667f"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4384f132527a76cd364cfd57f1e1103c"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.10.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e1bd0b25c4ca5560437a41a2a72b38db"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.9.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.9.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.9.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.10.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.10.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.10.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.10.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "14e7a87f13022dc3e1a4719efe71a4ab"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "989f511b50da70f47433167f4bf29302"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a938162be5e18f2b8dbe139330bd5916"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.11.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b51defe248345f2b9125107612a3edad"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.10.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.10.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.10.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.11.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.11.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.11.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.11.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "0277b4ceabd0871d7ceb470dec3e9a05"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "aeb7c6228094fae4ec42081a21278ed4"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "90891ef6183fab7db354688165fde1c0"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.12.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "3b67e28552026e9f01022f110eb7bc01"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.11.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.11.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.11.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.12.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.12.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.12.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.12.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "945f0aa99e97c105d9c33c4ef6241444"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e0f6e811dd4de501638c53c6b09dc7ac"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "011c05e569ff80154da894aae53754cd"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.13.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "030ec1307746164e9d6a83c60821da33"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.12.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.12.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.12.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.13.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.13.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.13.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.13.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "2f35f668a4c8a25aa3ff0bcdfc153abb"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "44006ecfbcf1507bae9996783d975f1b"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9f2e946729d8304df587793934e68a9b"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.14.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "98ed1e73b040e88b768a2d3f1e50e0a4"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.13.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.13.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.13.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.14.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.14.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.14.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.14.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "9a8392763c1ad8b9646f88647670deb1"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "80c00fd475ed470f5283646fcf0de2fe"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "de45e4f171e55058e7dc0cb5edd1251b"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.15.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "9975a7cadc023015d589e7b5926b214a"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.14.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.14.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.14.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.15.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.15.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.15.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.15.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "b6c6a2b259c15c0f2c6d0334b7149ce5"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1f653b481edc4b303085dc92bfce8c1a"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d1b0381e6a73262904aeb7c4d9830e22"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.16.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "5b1bcb3d8a97b25ea2e1027357290bac"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.15.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.15.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.15.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.16.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.16.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.16.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.16.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "594c8937fa49db8c4cf532b7687513e7"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.16.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "58017a37da8e533d82c3525a500b2d82"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.16.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7392d0dae9766064f7bda4bc41b3c0e5"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.17.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "dfa6fa31439d3c2801ed0712648bbedd"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.16.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.16.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.16.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.16.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.16.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.16.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.16.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.17.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.17.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.17.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.17.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "57bac99dcef0ad6d9144ef6b630c04a8"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.17.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9b2abc7d90161b79f9ed5361a772e949"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.17.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9b27dafab7f24815820c5033d92484e3"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.18.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "75a0a04664cd4ba84518e4d5061ab1f9"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.17.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.17.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.17.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.17.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.17.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.17.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.17.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.18.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.18.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.18.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.18.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "03c87758fffe6e300abb9fd6e4c5b949"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.18.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "eef3eb131f3e6f7bb8a863b53c97e893"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.18.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "53effd03e7c5b487d9c0b5f5c169d8b0"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.19.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d547ca99bc1024546e66ff6822665cf4"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.18.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.18.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.18.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.18.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.18.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.18.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.18.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.19.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.19.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.19.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.19.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "bf3d650dff6affb36effda48e07ad265"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.19.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cd974d4bce57f511ccedd3697d358002"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.19.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f9e1e97ea77868658ca6ce1f1dd87f82"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.20.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "cd7bc422872907148f0c665c6a27f6c7"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.19.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.19.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.19.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.19.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.19.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.19.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.19.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.20.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.20.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.20.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.20.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "d44dc1855793089555dd8a13b476465a"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.20.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "76713c776ed71f2304f012266e0b0806"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.20.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7c87051320a49de15b1c4716007670e1"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.21.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "9d99ee224dbdf2a288422f5c7049e831"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.20.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.20.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.20.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.20.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.20.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.20.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.20.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.21.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.21.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.21.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.21.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "af9ac041b33b5dd1a712ad66deb37b73"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.21.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8d006f21a2706007276ae747b7889e15"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.21.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b1ce5b5e8e857eacbac9ef4a7f368224"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.22.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "4c2b369c4d408d8b934b9caea8ebeaef"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.21.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.21.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.21.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.21.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.21.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.21.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.21.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.22.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.22.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.22.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.22.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "aa5db9352fd4473458d291c0c9a815af"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.22.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "276933c3b0f61fd74367c811903e2c47"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.22.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3112bbd32345943f0efd20ee4b8594d6"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.23.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "7e61d654a9efed618d12dcc866914c41"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.22.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.22.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.22.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.22.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.22.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.22.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.22.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.23.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.23.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.23.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.23.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "f8e9bb2636adaae8f4dbe82a292e7e09"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.23.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "326e98b8b4c20b529d073ba7196d778e"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.23.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "be47596a4d40d19cee92240aebed1485"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.24.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "67b057b68c0d9c3a71bb43d0c4a6cbe6"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.23.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.23.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.23.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.23.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.23.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.23.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.23.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.24.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.24.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.24.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.24.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "6881bd568e4dcfaaaca11429e64337d1"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.24.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f27a4a8e196349b54c0acc0af918bca0"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.24.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a1868ee18eb9e29788f9ef20867602c2"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.25.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c89e2bcee5c77d8cfcddaf65b8c4f07c"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.24.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.24.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.24.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.24.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.24.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.24.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.24.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.25.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.25.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.25.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.25.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "26cf7834795c05706ecd7bf9453d6ad8"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.25.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "bc1672b7a535f6e0968937ac46783dac"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.25.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "46337d1db8ef719e1760133215efb697"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.26.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "26c03737fcbd378ca1070f86c667f33f"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.25.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.25.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.25.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.25.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.25.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.25.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.25.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.26.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.26.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.26.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.26.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "0592187bb8840fc8f6a6bb9c2037e843"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.26.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6f85fb1b4c04c54f69729c490b8a27bd"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.26.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "bc02db0b153d93399633c2035c917f5d"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.27.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "68fd99a88ac8b830013caa69cb43f162"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.26.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.26.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.26.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.26.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.26.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.26.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.26.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.27.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.27.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.27.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.27.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "0322195e7dd0fb8acc6b2065cc5b3a52"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.27.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "aa4267e7b7df1157b2f243986d010ecb"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.27.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1abd02a96e4db32e76d0917dff55a211"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.28.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "4cb57488f81275872591db930242bd96"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.27.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.27.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.27.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.27.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.27.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.27.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.27.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.28.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.28.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.28.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.28.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "dfe8804514d91b94d58312ac625c1b78"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.28.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7c8cf1d50b6cd602228fcd9eae7c33ee"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.28.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e28954a20b845c58565a06db6b0a9b23"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.29.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "3252b60251ccbaaa9e8ac35d7f6f8b87"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.28.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.28.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.28.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.28.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.28.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.28.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.28.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.29.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.29.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.29.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.29.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "00c170fc1cab0e17a7da470cc643721f"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.29.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "015c56aed43dd2b956a7b24ce46f1502"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.29.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e59b08c0d0a95f1f62544ef20e483e0a"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.30.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "043e6fea2a14c310691b630b0430a05e"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.29.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.29.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.29.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.29.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.29.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.29.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.29.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.30.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.30.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.30.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.30.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "5efef3d804f28d3517bb8d27f1b8fd76"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.30.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f8c9f702dfbd7192afd36f0977e78945"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.30.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b72db9f72da8eaa3d8e406f0705b9601"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.31.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "96605655305fd1022175d7456382ac0f"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.30.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.30.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.30.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.30.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.30.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.30.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.30.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.31.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.31.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.31.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.31.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "315c24b3450bee7df6448433304dd069"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.31.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9dda61f59747d22f19e464baa2cff6ac"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.31.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3a67ff6b5b0dcb71be4d920bd691de7d"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.32.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a5a94d357e21a7be535f383c8c04d480"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.31.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.31.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.31.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.31.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.31.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.31.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.31.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.32.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.32.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.32.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.32.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "3256a7a1e76432f9700af365d0e041a9"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.32.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "acecb2f2ba9ff46123cd817b019ad58c"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.32.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "92f4d0476e0760d1abe03f578bd9d7b2"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.33.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0b0857f5b3c78baf20ff137bd415eb0f"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.32.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.32.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.32.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.32.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.32.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.32.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.32.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.33.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.33.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.33.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.33.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "be063081b18d1f76d5887f5c9b73b384"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.33.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b333e415c14550dc301803a928e906d6"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.33.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b3d90311850d141a15daf05f7dcc151a"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.34.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "8beb451e22cf35cd27c260f0c35536f3"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.33.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.33.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.33.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.33.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.33.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.33.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.33.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.34.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.34.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.34.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.34.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "56ca4ffb6dee6f1a71a3d7357c1d7db6"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.34.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4e5cdb82b8b18f46b5d744db9bbb294e"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.34.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "675958b7baddffbe31c64a528a13154b"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "gpt_neox.layers.35.attention.query_key_value.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e7f6dcd71e347ebd30eae42303796bf9"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 32901120,
"records": [
{
"name": "gpt_neox.layers.34.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.34.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.34.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.34.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.34.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.34.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.34.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.layers.35.input_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
},
{
"name": "gpt_neox.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27934720
},
{
"name": "gpt_neox.layers.35.post_attention_layernorm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27944960
},
{
"name": "gpt_neox.layers.35.attention.query_key_value.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27955200
},
{
"name": "gpt_neox.layers.35.attention.query_key_value.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32870400
}
],
"md5sum": "3c80e0a649a62b23989ace96f773230f"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.35.mlp.dense_h_to_4h.q_weight",
"shape": [
20480,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "73055c51111ab4599752621e31d8ebd8"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "gpt_neox.layers.35.mlp.dense_4h_to_h.q_weight",
"shape": [
5120,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d434babb295a3dda9e50b15bfe66587a"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 128716800,
"records": [
{
"name": "embed_out.q_weight",
"shape": [
50280,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 128716800,
"byteOffset": 0
}
],
"md5sum": "48bfa7329be22cea3ab8d58841b47424"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 27934720,
"records": [
{
"name": "gpt_neox.layers.35.attention.dense.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.35.attention.dense.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "gpt_neox.layers.35.attention.dense.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "gpt_neox.layers.35.mlp.dense_h_to_4h.q_scale",
"shape": [
20480,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 14755840
},
{
"name": "gpt_neox.layers.35.mlp.dense_h_to_4h.bias",
"shape": [
20480
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 40960,
"byteOffset": 21309440
},
{
"name": "gpt_neox.layers.35.mlp.dense_4h_to_h.q_scale",
"shape": [
5120,
640
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6553600,
"byteOffset": 21350400
},
{
"name": "gpt_neox.layers.35.mlp.dense_4h_to_h.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27904000
},
{
"name": "gpt_neox.final_layer_norm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27914240
},
{
"name": "gpt_neox.final_layer_norm.bias",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27924480
}
],
"md5sum": "6a8baa6e41155d7a03b6829998a2cbf2"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 16089600,
"records": [
{
"name": "embed_out.q_scale",
"shape": [
50280,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16089600,
"byteOffset": 0
}
],
"md5sum": "e1ff842137a8df9227bdbad4da1ce8c3"
}
]
}