Qwen1.5-1.8B-q8f32_1-MLC / ndarray-cache.json
gatepoet's picture
Initial commit
e500eca verified
{
"metadata": {
"ParamSize": 269,
"ParamBytes": 2067144704.0,
"BitsPerParam": 9.003102948079416
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 311164928,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
151936,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 311164928,
"byteOffset": 0
}
],
"md5sum": "9fd7aeeb1bbd80df281032f8a6453436"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 311164928,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 311164928,
"byteOffset": 0
}
],
"md5sum": "8d4916c17f623990150071b3c1087e8c"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 19447808,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 19447808,
"byteOffset": 0
}
],
"md5sum": "73ad34503189367643327b0e5cd665a1"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8231111064935d193fafdb1d8097f7d0"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32854016,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
151936,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 19447808,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19447808
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 19451904
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 30724096
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 31428608
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 32837632
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 32841728
}
],
"md5sum": "d7b58d27d16518bf9dfd1bc16e6a97f6"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "590b52e6c39bb5a9cb77779c0c954cae"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "c899cb4c4948e81649c3d81e64641931"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "52d06a984d511c203e1ca464d38fefdf"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "86a5dab53488833f6734147d6f87184c"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7320f9a23e7c25f040de925004b1878b"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "8c4bf3a39137cf0f5af2ed467c05ef1b"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "ecb4fa6268f906bb02e3882c121d876a"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "cf4536351c03db3982236e48a0379e39"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c4fa255b4189ef8672b0294d38e4a24f"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "a0ebe921dbb1ac894b7b48d51055043f"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "376daf4db4a00f9088582f22e063291b"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "5f4ab6c00994dae3f5c863d4a27de06e"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "794b77cc14531b07b5de1daf330d81a7"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "b02dc443de5cacfd87ac728f71a5d113"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2ee1fb7989a2b523cd878fdd605211f3"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "998c9c82ae278c936fa9b98172d10edb"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "de13765b1e31342906e2f4e0b02c81f0"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "be8cc00a2a061877c46d8d33b15007c7"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4b4f17eb683c36c53a71a24dc342bc36"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "bfe84819102f516ecefe13317b7a868b"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "1c8f3e7b681d47c5afcfbfa492a82b0d"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "fbb336c1e963fb8224ad0cce6f6c9eb8"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a3a2e17521e9939915f3585e13d816ad"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "63d9b83b4cdcdbfe4e8423cf3a052591"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "885875e67aba433587f42bce09f5e7d9"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "fb540774e15a07e7ff1e737b6e1e6ece"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "cffe15773593df15c2329d29d3634f04"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "c653d849393b27e72ccae83159f17bf0"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3cc58cd5ed682131541d522c2a66309c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "9bbbc6b06d53f73b61d6d8adfd9ec63e"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a7c8bb315b8ced1aee9ee37a6fca13c5"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "e1d6b5dd3b7f741ce57bf49a41b91ff7"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2e7a8b1fd8c4d904377bd56476e9284a"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "d7ae246c1a4994e5c651f1f9a7c8bd89"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "523a8636cbbef697fdb075efe0bd2450"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "d27b8ca902a958a7a40b04c65e04b6f2"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "93f4f1035b6c28ad578ee076b4c8aa96"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "6d734f64c8eb4ac237e3a9484ffac89d"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "84b2eb0f2d5376b493c6979fc1372e08"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "bd91fa4a4cda4c7cccdd3146131872b0"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "339cc4ec4f72d11502b4797f7f70cb6c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "07b4af925b95f11fdd782425d19382c7"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6a761272ae46d4daee09d5bf115ed0dd"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "0515ebda56dc78c731b0aa1399845bab"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
11008,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f3445c55433689e2b2f12b91b703302c"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 31232000,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 17829888
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2048,
172
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 704512,
"byteOffset": 29102080
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
11008,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 29806592
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31215616
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31219712
}
],
"md5sum": "8e2d34afc5c6986b2033426b47594619"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 17829888,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
6144,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2048,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 13369344
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17563648
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 17825792
}
],
"md5sum": "91964c6c5e5a4e7ac1c2ba0237933f7a"
}
]
}