| { | |
| "metadata": { | |
| "ParamSize": 343, | |
| "ParamBytes": 798633984.0, | |
| "BitsPerParam": 4.504832386302102 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 52428800, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.q_weight", | |
| "shape": [ | |
| 51200, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 52428800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bbcff0fa08b86d3c207d0067baf2422e" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25468928, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.q_scale", | |
| "shape": [ | |
| 51200, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6553600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 6553600 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 6557696 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 6561792 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 12853248 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 13639680 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 13651968 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 15749120 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16011264 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16015360 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 24403968 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 25452544 | |
| } | |
| ], | |
| "md5sum": "21200155d3cfbd8b1ef46a254979035d" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "73063ed6cc50fac6cb5805d281e2e2c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "e0b6e0e09d5711ce97be27390531eb9d" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "0b30370e6efe5205bb84ad315d66dd89" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "bcd0448dd1f620fb21898d553f124b4e" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "40831558a9eb61985fb59217dccf418d" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "2bce86dae74f79860f155c52bcb9f361" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "3b93cb72b930984c98bd02aaeabc6e73" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "8c06bea27b95b8031c576d9c8df2cec0" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "161c60ecd4fc0b0ee49c90f60be5248a" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "0ce4cea7a4c219df61fa52d922cc714a" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "3e5d2f55f35058805f6b4a0ab0b3b33e" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "a58f5e56ec3d3047a2f20910833a80bf" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "e53dd27272d22ad8746d7073f2c0db75" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "408b5639110fd4f487b9736ae163303c" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "bcb5adb8d28f3090005463ecae311df7" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "fabbb1bd044446a45e2b3daf5b81244d" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "230582bc0fe650cff3b88b5a363b4dee" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "6538665f514523c492c5037fe2eb092d" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "2e3b2187339c9cbf714218f9d3a2caf5" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "0ec0822d742f58f4d19f2a39460c1c71" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "015b6bea73e884208bb568bffedc3f9f" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "f1ca94839099428d42046796a91b1f37" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28356608, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.Wqkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.Wqkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 15740928 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.Wqkv.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 16527360 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 18636800 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.fc1.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.fc1.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.fc1.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 28340224 | |
| } | |
| ], | |
| "md5sum": "3f30bffc08d16b8936298c07da47632f" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 52428800, | |
| "records": [ | |
| { | |
| "name": "lm_head.linear.q_weight", | |
| "shape": [ | |
| 51200, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 52428800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b5c7458234d726812e45275561d6d3f1" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 16105472, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.fc2.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.fc2.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.fc2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "lm_head.ln.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "lm_head.ln.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "lm_head.linear.q_scale", | |
| "shape": [ | |
| 51200, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6553600, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "lm_head.linear.bias", | |
| "shape": [ | |
| 51200 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 102400, | |
| "byteOffset": 16003072 | |
| } | |
| ], | |
| "md5sum": "693188db1d3398a7923eb9a4ec2c9660" | |
| } | |
| ] | |
| } |