{ "metadata": { "ParamSize": 455, "ParamBytes": 1434695680.0, "BitsPerParam": 4.071494604849007 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "compressed-shard", "nbytes": 65536000, "records": [ { "name": "lm_head.linear.q_weight", "shape": [ 51200, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "a73cac9260e7c5d17d7aed586120232e" }, { "dataPath": "params_shard_1.bin", "format": "compressed-shard", "nbytes": 29245440, "records": [ { "name": "lm_head.linear.bias", "shape": [ 51200 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 0 }, { "name": "lm_head.linear.q_scale", "shape": [ 51200, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048000, "byteOffset": 102400 }, { "name": "lm_head.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2150400 }, { "name": "lm_head.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2155520 }, { "name": "transformer.h.29.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2160640 }, { "name": "transformer.h.29.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2181120 }, { "name": "transformer.h.29.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 15288320 }, { "name": "transformer.h.29.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15697920 }, { "name": "transformer.h.29.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 15703040 }, { "name": "transformer.h.29.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 28810240 }, { "name": "transformer.h.30.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29219840 }, { "name": "transformer.h.30.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29224960 }, { "name": "transformer.h.30.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29230080 } ], "md5sum": "88311e19c77cf44d07042f94adbc142d" }, { "dataPath": "params_shard_2.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.30.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.30.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.30.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.30.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.30.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.30.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.30.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.30.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.30.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "a304b8aa753047be60a49711a23968c2" }, { "dataPath": "params_shard_3.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.30.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.30.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.31.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.31.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.31.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.31.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.31.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.31.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.31.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.31.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.31.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "51c8dfe62415b0f6cfe6089817463bbc" }, { "dataPath": "params_shard_4.bin", "format": "compressed-shard", "nbytes": 65536000, "records": [ { "name": "transformer.embd.q_weight", "shape": [ 51200, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "32667e1167ee192db5825eef5ac5469b" }, { "dataPath": "params_shard_5.bin", "format": "compressed-shard", "nbytes": 29112320, "records": [ { "name": "transformer.h.31.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.31.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.31.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.31.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.31.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.embd.q_scale", "shape": [ 51200, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048000, "byteOffset": 27038720 }, { "name": "transformer.h.0.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29086720 }, { "name": "transformer.h.0.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29091840 }, { "name": "transformer.h.0.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29096960 } ], "md5sum": "724bec5ab187feddbe33456cfe46b86d" }, { "dataPath": "params_shard_6.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.0.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.0.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.0.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.0.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.0.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.0.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.0.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.0.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.0.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "d016715eeaf1a0b6c769a7325d714610" }, { "dataPath": "params_shard_7.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.0.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.0.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.1.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.1.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.1.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.1.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.1.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.1.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.1.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.1.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.1.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "abed616141072c7aafb2c1049cba4fbe" }, { "dataPath": "params_shard_8.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.1.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.1.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.1.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.1.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.1.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.10.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.10.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.10.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "9ece92a8ca2c3663810654e03acbff97" }, { "dataPath": "params_shard_9.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.10.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.10.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.10.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.10.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.10.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.10.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.10.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.10.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.10.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "25bf3a4cc907a973612577f7bd9afb3b" }, { "dataPath": "params_shard_10.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.10.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.10.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.11.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.11.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.11.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.11.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.11.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.11.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.11.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.11.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.11.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "cfdaeb81cbf5529631676b5790f33cf8" }, { "dataPath": "params_shard_11.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.11.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.11.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.11.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.11.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.11.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.12.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.12.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.12.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "1a634af73e7e244a94f334a09c2de4e4" }, { "dataPath": "params_shard_12.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.12.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.12.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.12.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.12.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.12.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.12.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.12.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.12.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.12.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "de784764bb7867b232fd3a8b88e9356e" }, { "dataPath": "params_shard_13.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.12.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.12.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.13.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.13.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.13.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.13.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.13.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.13.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.13.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.13.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.13.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "55638a64518a20cbcccd528d0008713e" }, { "dataPath": "params_shard_14.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.13.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.13.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.13.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.13.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.13.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.14.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.14.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.14.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "78182df8d3bf4c6861809246fa03a733" }, { "dataPath": "params_shard_15.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.14.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.14.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.14.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.14.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.14.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.14.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.14.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.14.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.14.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "48a4edf6be2d8e4c9c860d76a96311a8" }, { "dataPath": "params_shard_16.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.14.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.14.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.15.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.15.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.15.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.15.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.15.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.15.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.15.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.15.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.15.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "c16e2e07b17806cac8fe7665b7fc2d99" }, { "dataPath": "params_shard_17.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.15.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.15.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.15.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.15.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.15.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.16.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.16.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.16.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "0b52419528c2161bbfa5d740f4b30830" }, { "dataPath": "params_shard_18.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.16.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.16.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.16.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.16.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.16.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.16.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.16.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.16.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.16.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "1d16d28ab5abfa96262e7dd2824226a9" }, { "dataPath": "params_shard_19.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.16.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.16.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.17.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.17.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.17.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.17.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.17.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.17.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.17.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.17.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.17.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "88cdbaafb06e60331b8ca376c100bf95" }, { "dataPath": "params_shard_20.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.17.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.17.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.17.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.17.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.17.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.18.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.18.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.18.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "f53f7b6ddc7e9d9989c0a829714b190b" }, { "dataPath": "params_shard_21.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.18.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.18.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.18.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.18.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.18.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.18.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.18.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.18.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.18.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "7dc85a665f90f635f7f8b5c281f22b41" }, { "dataPath": "params_shard_22.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.18.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.18.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.19.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.19.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.19.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.19.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.19.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.19.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.19.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.19.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.19.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "1f3ddde9aed5920d18b063be52cf0e35" }, { "dataPath": "params_shard_23.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.19.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.19.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.19.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.19.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.19.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.2.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.2.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.2.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "8f130ec450440fbbc7ab666d9f2399f7" }, { "dataPath": "params_shard_24.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.2.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.2.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.2.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.2.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.2.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.2.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.2.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.2.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.2.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "8c49531730352ca09a1cbf61dae331af" }, { "dataPath": "params_shard_25.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.2.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.2.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.20.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.20.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.20.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.20.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.20.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.20.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.20.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.20.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.20.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "a8da6870326894484d811565d4f986af" }, { "dataPath": "params_shard_26.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.20.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.20.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.20.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.20.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.20.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.21.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.21.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.21.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "d3d7ae3df32c24ea975dfa61abab4ec3" }, { "dataPath": "params_shard_27.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.21.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.21.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.21.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.21.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.21.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.21.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.21.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.21.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.21.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "a3113124027df13e27f1cb9ba06af4ae" }, { "dataPath": "params_shard_28.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.21.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.21.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.22.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.22.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.22.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.22.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.22.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.22.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.22.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.22.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.22.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "6b32e527dcd1775d957dc5a89657fe7c" }, { "dataPath": "params_shard_29.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.22.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.22.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.22.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.22.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.22.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.23.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.23.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.23.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "0043c8bf636bd38a3fa7e9930ac59588" }, { "dataPath": "params_shard_30.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.23.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.23.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.23.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.23.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.23.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.23.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.23.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.23.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.23.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "2dd32125527a193638c32609831a9cc6" }, { "dataPath": "params_shard_31.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.23.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.23.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.24.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.24.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.24.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.24.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.24.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.24.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.24.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.24.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.24.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "fcbfd1401c602796274ff100a578d460" }, { "dataPath": "params_shard_32.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.24.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.24.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.24.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.24.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.24.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.25.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.25.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.25.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "1229daceea3891d9a7c17bca5fd72ec1" }, { "dataPath": "params_shard_33.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.25.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.25.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.25.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.25.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.25.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.25.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.25.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.25.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.25.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "101d8edc8cabf30ab764f669abbddb52" }, { "dataPath": "params_shard_34.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.25.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.25.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.26.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.26.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.26.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.26.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.26.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.26.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.26.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.26.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.26.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "37d6c630a5d9f5bdf1c80a0b13fd1457" }, { "dataPath": "params_shard_35.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.26.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.26.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.26.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.26.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.26.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.27.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.27.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.27.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "67b23bb4ac10e458fac54d6c091803ed" }, { "dataPath": "params_shard_36.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.27.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.27.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.27.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.27.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.27.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.27.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.27.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.27.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.27.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "adabdb0ae2416e3fcc04461f54e4101a" }, { "dataPath": "params_shard_37.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.27.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.27.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.28.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.28.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.28.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.28.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.28.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.28.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.28.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.28.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.28.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "3c0b04badb26de6b10c3bf84a9ed6377" }, { "dataPath": "params_shard_38.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.28.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.28.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.28.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.28.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.28.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.29.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.29.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.29.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "83c88ae98a2f64ddf18b837428e6f486" }, { "dataPath": "params_shard_39.bin", "format": "compressed-shard", "nbytes": 27089920, "records": [ { "name": "transformer.h.29.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.29.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.29.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.29.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.29.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.3.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.3.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13527040 }, { "name": "transformer.h.3.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13532160 }, { "name": "transformer.h.3.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13547520 }, { "name": "transformer.h.3.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23377920 }, { "name": "transformer.h.3.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23685120 }, { "name": "transformer.h.3.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23690240 }, { "name": "transformer.h.3.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26967040 }, { "name": "transformer.h.3.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27069440 } ], "md5sum": "44946933c30cb6d03d4cf47685c4543a" }, { "dataPath": "params_shard_40.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.3.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.3.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.3.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.3.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.3.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.4.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.4.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.4.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "037dbefb78419ca72477dc0edd4be3f6" }, { "dataPath": "params_shard_41.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.4.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.4.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.4.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.4.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.4.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.4.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.4.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.4.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.4.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "719578224344c0728f18d6e8f55bbceb" }, { "dataPath": "params_shard_42.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.4.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.4.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.5.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.5.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.5.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.5.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.5.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.5.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.5.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.5.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.5.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "3f8d4f0e321a321690a3dfb47a8b6e86" }, { "dataPath": "params_shard_43.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.5.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.5.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.5.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.5.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.5.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.6.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.6.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.6.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "b1406db88ea193f81707bd57346551c2" }, { "dataPath": "params_shard_44.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.6.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.6.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.6.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.6.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.6.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.6.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.6.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.6.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.6.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "d517ef6fb68c17b15b89663bab46f71b" }, { "dataPath": "params_shard_45.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.6.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.6.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.7.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.7.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.7.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.7.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.7.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.7.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.7.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.7.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.7.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "4412208e6aa0bca14aa3efe3d31be342" }, { "dataPath": "params_shard_46.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.7.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.7.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.7.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.7.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.7.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 }, { "name": "transformer.h.8.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27038720 }, { "name": "transformer.h.8.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27043840 }, { "name": "transformer.h.8.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 27048960 } ], "md5sum": "404c768e6b2f0c9eba570a611a64553e" }, { "dataPath": "params_shard_47.bin", "format": "compressed-shard", "nbytes": 27064320, "records": [ { "name": "transformer.h.8.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.8.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 9830400 }, { "name": "transformer.h.8.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10137600 }, { "name": "transformer.h.8.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10142720 }, { "name": "transformer.h.8.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 13419520 }, { "name": "transformer.h.8.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13521920 }, { "name": "transformer.h.8.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13542400 }, { "name": "transformer.h.8.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26649600 }, { "name": "transformer.h.8.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27059200 } ], "md5sum": "3594ecbee10b740a01760d2955d817bc" }, { "dataPath": "params_shard_48.bin", "format": "compressed-shard", "nbytes": 27084800, "records": [ { "name": "transformer.h.8.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.8.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.9.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.9.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13521920 }, { "name": "transformer.h.9.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13527040 }, { "name": "transformer.h.9.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13542400 }, { "name": "transformer.h.9.mixer.Wqkv.q_scale", "shape": [ 7680, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 307200, "byteOffset": 23372800 }, { "name": "transformer.h.9.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23680000 }, { "name": "transformer.h.9.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23685120 }, { "name": "transformer.h.9.mixer.out_proj.q_scale", "shape": [ 2560, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 26961920 }, { "name": "transformer.h.9.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 27064320 } ], "md5sum": "41a7810d82d8460c10bdbcd1df81aa62" }, { "dataPath": "params_shard_49.bin", "format": "compressed-shard", "nbytes": 27038720, "records": [ { "name": "transformer.h.9.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.9.mlp.fc1.q_scale", "shape": [ 10240, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "transformer.h.9.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13516800 }, { "name": "transformer.h.9.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13521920 }, { "name": "transformer.h.9.mlp.fc2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 26629120 } ], "md5sum": "f5ecde2e980517a9ee9e5dacf37604e0" } ] }