{ "metadata": { "ParamSize": 325, "ParamBytes": 3801997312.0, "BitsPerParam": 4.065298569766646 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 98304000, "records": [ { "name": "lm_head.q_weight", "shape": [ 48000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 98304000, "byteOffset": 0 } ], "md5sum": "92109e86dc934e9214ce842021a549ef" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 98304000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 48000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 98304000, "byteOffset": 0 } ], "md5sum": "cbcf488c95db8591f9fd813dde8a81e9" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d12c86b3b34ce4049ba4c4303152d7bf" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bcfc69c2a9c009e61e121e7a70590ad9" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "78be2c8e2297642b424aad9ed802985e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8d2e1101f5f9058130830cf373b1fffd" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33308672, "records": [ { "name": "lm_head.q_scale", "shape": [ 48000, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072000, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 48000, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072000, "byteOffset": 3072000 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6144000 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 6152192 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 7069696 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8904704 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 8912896 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 21495808 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21889024 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 30277632 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30539776 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30547968 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 31465472 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33300480 } ], "md5sum": "10402e7f0921d45549508b4527394268" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 21626880, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 } ], "md5sum": "599de350773355ed7fbe89f96d27cee5" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3e68dec7d8439d5d42dd21db24000d37" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0786a22ffa56e70c17a06bae486d277f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "c3895ca281578344ba3a2ca1c6d3452a" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6d7c2452b64b6f3488435ef26c89ceff" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6b21e89d6eeead763af401199e2241d1" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "4e4710ceb7bc7b1e2f984fbe5b7933ea" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "23e1db4d98d93103073d91c17e6fa504" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "06aeae619faffc0dab46497a4cb597ab" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "1ee1ef768d03a95c88f7b4a4b24bcb8f" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8b5a00985f73ad63814a38d4bd51e62c" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4c93c8d6b8859cd6b3975eb2fa24c9d3" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "4899a5ebf18d116340810a0ac097e9ea" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "63e9bb07d86140ed1458da7a6d363c77" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a4d07c11f1589721424a37825d7b4491" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "aafbc33769135c70c77e4b586b44ee6e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "de0fea29652f6e3a550bafe749df52f7" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ac0ffe7b7b54a0f9d588fd93d74ddc61" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "64f8da779bbc12b9a897d22673b803ca" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "79f3d8afcf511b30d588383c3c0403d9" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "07a896d5ff6b4ddc92b8bfff5c89d0c4" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "d82029aaf019154d241869e10e5c5890" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1981182826d01799352e9d665fb0822e" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dba3eb320857dd800408e2bc93d054ee" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "ed96c8d4f5340bf755bd279cf439e214" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f281a4bfb19af11a4e25953171917074" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b308c66aee2c2c0e88663543f76815cf" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3058f2876f75972dcfc22c6819c930d1" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "16bb9594371826e6f23e9608c35595e3" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 27164672, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24395776 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 24403968 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 25321472 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27156480 } ], "md5sum": "c7cd1c338078fc679663c94c1487c3e7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c3f32c2fe6c289109a87433617c4eded" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c3a7a22502b8627f09f3ca598f8cd426" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "8ebb97f1d5238a48aceca639b5ab2cfb" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "19e37e353c4aa4d9ece3a5925c11265c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3049b5e3751e54c651afc028c2a00e75" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "35516368a61b609a55ee3bb9096d613b" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b753f46579f2c7edfb8890328de69fda" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ba261054cb49a590567b52aaf72de6f7" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "7106bc4c57818f744ec9ae7ee31f23f8" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7cfb6665d1b16544c5de835dbb770230" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4096c00e860d0d5e5498dfeb6b88ab6d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "3c03f818153915a0ebe053ced867b356" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f17b2861ecb3f31a1173dbcc40db782b" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6082c59bd42a69ffced6a597fa1fdd43" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "6efe8adb5762400e12c9960cece2fff1" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ed95ae1194c8efbf245d108112cbb2cb" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e82aece79ea9c92d9850e3b84a2cf89b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "dc8e7e85fa779145ba5fc38da89d5a1c" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ab10776431d740f6927f00f1870bfc8f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f166d60f72f5c23f0fe055dbe0a6398e" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "38749a9815469df135552291c6aa7f0a" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c7f239f15a79a123ffa4226520c25608" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d8ca8fff4ed9d09cb4b58c211a813afc" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "aa5b5a89f744a005a22a0f6cf553c2fc" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "939a24b9442e6e0798fb9aac9f58c927" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "69dfb0eb340883e35601bbb7c08b85d5" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "303790ac1889698ef73c6b6f02ca9cec" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2a61aa99831c5997d94878a3e45305b8" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 23461888, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21626880 } ], "md5sum": "ef9fc2a323b91e640450937e762fbbc4" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "daabd49c2396b641d14346d2fb775b44" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "125a871ab98008edfcc35462159c09d3" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ffa27e7fdae04d78245891d1ce2b2cfa" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25329664, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22552576 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22560768 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 22568960 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 23486464 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25321472 } ], "md5sum": "d81344e8cbcfe03ad68d744027ff564e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2dd284c2624d7c878b587a7e27dc19fd" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cce3db7ecc3dc040c1a9214c50300676" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "f709b74aa93e59154ec9fe15b27ccc7b" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c4db47f3c9eccfa72813d684e4d96a20" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c570af77111a391978aa65146839ba5e" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "3f27bba8c96d1212f5b0e05091029d5d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0ee002eca8a67987a35c75503fa74d2b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d250578f953be1efcc1d4b759d42eeaf" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "21580bc2bcf592ff83993f59e2a23e2d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d7a3423d6df1588af73e8778087455a3" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "51fa9525e6217cf79e4159f3859ef72d" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "b8ddd55547269daa75a349600b84e487" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f4f1d5b93c124b96fe03a634c5295834" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d00bcb7e6068c9a53920ea2e7a92267f" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "798378b2453f80ead5024b7507b5ffae" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f2079266ad6da4492d4883ed6e9b7cfd" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0a1c97a4a14af65166546f0e238b9585" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "46281c2a777bc7e36ff803b80c04aa63" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "db646329ab26dcc02ad22aee01a9e15f" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "810134af58b76686e30481756beb56da" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "2a1a66538087fea6e7358d44da32fcae" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "858f5d245287716130bf049fa5f25bca" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "573158889fc7f1c57588a63895246d97" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "38221e990ef6c09c2ab6b64d2f3abb13" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "91020d5c1d111d400104a3e22af1c23d" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "735bb80b95e7ebbe858e2de6285f9930" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "159e23252b00b42dfe0c24b439d0bc1d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 21635072, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 } ], "md5sum": "8beb5e4cf882a46962b38cead1e5722e" } ] }