numen-tech's picture
Add weights
396d73f
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3737657344.0,
"BitsPerParam": 3.034603510192578
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32768,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "b340b13895ce48d5aa5db389bee88719"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4c89ff25d5c2671051b9b470fdb8c469"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33316864,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32768,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2097152
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 2105344
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3940352
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3948544
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 3956736
}
],
"md5sum": "a73c0c7028ab34d13c95b53191412190"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2d404ac381dd6655a59e0ea43f79ccd4"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6c39e771ebf3a1b5e459573cff3bdca5"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d32431d1dd41f7271eaddb2b42861734"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27156480,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 917504
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2752512
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 2760704
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 15343616
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 15736832
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 24125440
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 24395776
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 25313280
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27148288
}
],
"md5sum": "a6c80eb81f1182bc09d90b2d47ef9351"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bf54ce79ec33e09bd6eebf5f0d779ecc"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "66b976081b3708fbb01ab6aac936db50"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "528c520d50c32cb6f9d5fca170c5715e"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bc73fb78ce7e386a30becc8fa649ee52"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2f61ace76f66de36928750b443897ac4"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "990466e515bb00f2c9052bfd9ef7899e"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1d9604343b7d38f320323273c5284e55"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0f1eaf30b123fb34e1365c08a04d6e0f"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "7a00500cd34f9e063c5c258d6f308933"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2ee96a87ff2656961f4843967bdf059f"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d5267a50229efeb23ac07ee054a1daa3"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "5b2e509770b83d0454cb638bf58fc69d"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4b37061535068d81c5f22cb685fc45c5"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8b3d1c98f084d95f381d5f8e8e147128"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "9c4501253c586e0e68b8f5413026fdb6"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7dc436f416d72315dce16671e63047fa"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a5b61499cbb4c24cbb27ce083fde57a2"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "8e23fc9f508d0b22488a69fccbb1ccc7"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "56b456ecb40e79144212e49b0c694fb1"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "5eb9512cf5b5cb6ee52c21e3cff79fc9"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "674f23b4235c2054c9d09c309fbe8981"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7fdc5e2ab800a71aec7919044b615294"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e90c314cbd080ce89c9cff895d192cc1"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "817b42443031dc797557b14f7a363536"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32768,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "0eb5dbcf4218c8d480c971e3d523df99"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "58df9dc45bc31d4317053d7787330d70"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "653e6950236eb640042506778218a4e3"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 26501120,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32768,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 21635072
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23732224
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 23740416
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24657920
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26492928
}
],
"md5sum": "d6538f337dad790e82cc0b2245bd1274"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0092a49c10ce40029ece748063384159"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b4cda3d438bdf5a3a5c6f59f75a8a220"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "84c2dee67aa4513a5302712fc082d957"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "805fb62407e0349efc364de1d7dc1e4b"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 23461888,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21626880
}
],
"md5sum": "558f7738e68a625bbff83a8582a011b3"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "280b119d1a768b3269ca409c54c92f2e"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cc7bca71e5bcc0e17a8ff4f5b8bae9fe"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "a5225b5e31d7e19674444113e3d0a305"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "10619a6ebfff2f11d7abf62fff2a727f"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bd7b226a5411facf514f93c900505e56"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "f4b527f7184f30318cad4bec83ac4b15"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f1190e2f81434f2cafca9fe2348c181a"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "aa7dd3a0a0b7d7244962ef72b6ff95bd"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "5eaf40750724964f61481e11658e6b96"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fa21872c5124796edd7790d5438aa3d9"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "21b6a94084081fd8bc618e46db1128cc"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "451b931ce836a29139911bd534b9d10d"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8e2e4aa4331069828a8c9be898d68089"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "79518a2d6e378e10b7bb111ddff8c173"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "d2e528f29b33455b07b3116e91c25d71"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1e13b4880f04ffa699227d6283a838b0"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1b65c565b61c321459c3c556922a52f9"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "96bb0df26d5d416970bdf5005b84aa5d"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6f996fe7f76026530583309038844f6f"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4bd65559bb826ea6454b6834e7f3c3b5"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "fa81eaf23496b572c6b8368270641980"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "72371465bddb95c36b064836f1daafb2"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b513813708b9c3ed2613dc01589903dd"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "2a0cb064f36589b9b74775240d32f384"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "95117326e16f04b6d7c56fac064a61df"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "53d5ebc3fd860da0974968c96c052b4f"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2a7cec890f3786cd0f0de9a22e7d2a40"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25329664,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22552576
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22560768
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 22568960
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 23486464
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25321472
}
],
"md5sum": "0c70c3f8bad4110f4d9abda144563205"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "407be8f929a406fa14e946aa31bf399d"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c943e91689f6afab9e7e58565cf73feb"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "8a9dd04a7b83dccd57eea95592b08c64"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c456209d117c0e8732b76d171f54458b"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3923b5a4f3a2e6b7b6f4dad85fe57d67"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "4a1ac1f70c9323c0213904a4cbbedc59"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "35c3d994df845c9cd8a3b13f159218f2"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "264fee745d6c079863609aad27d1f011"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "5d0c300b88b1de45677ebd6275cc12e7"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ed5cd511352b4056fe0b11954ab3d384"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bae3d2d20b46fae1a21890a43d8dd948"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "3b5d0b25f302ab04bcf306ee400a0c01"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "013a7b786ff0f6476f008a92e76260dc"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "481499f3ff835229744499c3b3a8e4fc"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "122986493b24c23172e7dbd9750373fb"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f006ff72d170c02e50301db5b4daf7b1"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "89d102fef705a6124edea3eb06dc0a22"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "9bafe2315072aaa62465186dce1bf77d"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b7c938ab5875601d3ac7550798a5f889"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3da9a31855c6d0093d22250aabc19465"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "5b631dcf11f00f3909003151f79d4f4b"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "520b1902437cf074f9755a3916676a3d"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7889c337f41d4b57ff81e2e95b110703"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "af373ef8898e8f5b88ffe4996e611e2b"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7ea9620acbe93dd614dbbd2e36f682ee"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "690207ec15adaf5269902a5331732efa"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "07fe6f2e62c51952724c4c9378f330d6"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7e58d50279f59bf4b91d94c94d05d06f"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21626880
}
],
"md5sum": "b524bcd44c40be1de6b1c38f8e0a1fb0"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "7fa3490f64867212f99e6d74194d2cdf"
}
]
}