{ "metadata": { "ParamSize": 405, "ParamBytes": 5859265536.0, "BitsPerParam": 3.6013014104119914 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65540096, "records": [ { "name": "lm_head.q_weight", "shape": [ 32002, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65540096, "byteOffset": 0 } ], "md5sum": "405ed1383e19618ca9de16b5a07c950c" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "a58fa64488aa733bfa0c0b6dff61c52f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "605784270aff6407173936ba328e0d45" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "dd06c00511a38ac0c041974a8903d4bf" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "fbf59071e62c576c355352f68c3deb9c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 26329600, "records": [ { "name": "lm_head.q_scale", "shape": [ 32002, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192512, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8192512 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 8202752 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11745792 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11756032 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11766272 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15309312 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22387200 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22397440 } ], "md5sum": "acf8a9f03bf650d94783ee1dcdeef8cc" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "d5383e04e8e32bcdddab65e86a6d755e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "a48c63509b3ddac9ee549eeeb08f2902" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "0aa84c2377fdb5977777b80ab0d1fa3d" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "2ad643291a166560b17471cf6f7a85ef" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "38fe08cf63f1947c46c1011229502dfc" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "51dd41b92a0659f813a9b5b433f3c65e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "7cbe3bc78ad3e9e580e5b65271f495c2" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "49271c1c80d0ca6468078bc1403b849c" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "cffa00768cecc6d2494913449a8ecac3" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "e5aa5dd5633df6c3536c9a03edcb929b" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "86da7c6ce1e9faba69fe73e6def30838" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "ce3c18e893f8fba36010f318392c27b1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "df9dd81037f248b7707046c293b3d2bc" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "8ee31dfcd211017fce2208f2c8803e1f" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "7ef2d6a5460d42594c78c0dda94d5083" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "45ce294c6d526904e7721976049da1fa" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "1f136d0884079fc0f82f6b52436a569d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "2513956537056e22d19cd63f1e556f11" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "decf7db49bd46d6c73512c7d4e6dbfcd" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "6848f3119ddd895d4f9bd7e78dbc4ce1" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "84bc204e9cbbd2866ee47b7743ed016b" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "a1cfde4ebefc7ebb0cd424e88de6de69" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "b0a760907a8d4cf8f907caa2251d61e0" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "700cac2c8bff3aa823a64367125edb2f" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "3cd234034524d81f942ff0e9b9e17d3e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "189d32c8f4d8fcdc2fbda71f867521e3" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "d12eb2e1d8537dfa3698c2260b030eda" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "091f814640fc8a97fd61b4bc68d6362c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "f28d3a261a967b24b451c10bc5a4aa31" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "6ff0a26be500e18b0756e9c9fd456dd7" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "4effb57441d62d554c25c7788300a80d" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "e9a1d00af6e2211f42d0f2c6e932b9cd" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 65540096, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32002, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65540096, "byteOffset": 0 } ], "md5sum": "22accb9b26f672d87c2f723c1d6f7f20" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "3de776a20e7e771a00ba6ce914bcab4c" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "a8d62d234dfaa7a1d69e8d38d9a4cf29" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "112dc1dbb9515757312bcd554538e8dd" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 30640640, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32002, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192512, "byteOffset": 11806720 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19999232 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 20009472 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 23552512 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30630400 } ], "md5sum": "80ecb5c076651a55d6c9ec0670752b55" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "f008ebb65a98156a08250227f00003e1" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "29cdc3032a48fb58a46c609c9fb8c9d3" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "e7cf6184c101e3df8b6e593f2b507a0a" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30302208, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 3932160 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 14417920 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15728640 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 15738880 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 19281920 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26359808 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 26370048 } ], "md5sum": "9e6ff77845fd2b6d2af607e61c47e348" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "f7e685cb2b3400026f99d1eef65356c5" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "154d749416e6970411b6b77fe912a243" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "083519a30e0e203ba71a810b09071a6d" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "ce4db900ce509cafa51bc10607052d47" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "d106267f5e66a6003dd0632a183d9ddf" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "7dfdc2e126d4e36bba75b7da95d7d657" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "45e2432ec62115a5e3fce05c5a8b2cd6" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "7693fe69568b9c6df1f9b89177442c7b" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "b3033360a34d6088b414fbc713a7e10a" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "f281d04a1063d8643f940ad73ee0e1af" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "57f2028b43f7e959738cf4df001a9f42" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "22fc37ed88b123bbb66284efea170661" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "79a1435ec27887aee47aa41f392293a7" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "ef5934bbc7aa88e5f6206863d1abf6e4" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "03e80afa88ff14c5f2feb3bec6f2ac51" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "f17521658c466207c9d5315ff19f7507" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "7441c3e445fd2c2b10868f12ed962c6a" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "3558c73f7c67e495309f80f48a56ab04" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "16b5c1c705fa1f8c3812ca1e6e5b5e67" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "64a627d97a4fde812187e67c641c55d2" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "b9322382fbc54b0bfc9b676a2a995737" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "6223f445868a6d1cd08ffbc3fe9d8fd6" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "11e473bac8600407d7e093f00782e78e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "2c6cd739ceac2a95454d3d0c5cfd0542" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 30302208, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11796480 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15728640 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 15738880 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 19281920 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26359808 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 26370048 } ], "md5sum": "a3c97c34504e1f05dffb0cf2c6b86a46" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "80d972df01961c5511c548cd3c99f830" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "55e06952bc351fc2e525319f2bedef6f" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "dbb0510f881b75c4ccc6a0dfbbb3124c" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "a527a512723bbbbd2f1cd7859bcf5d36" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "9eb4809bd54ea26a8b5126fcb117a70a" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "cba9c5fde3280bb564b45f5992f5b4e8" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "8a0411952df3088bb0afe38194bdeee3" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "6def8f3a5a7e3f74488aa106efb8fa34" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "b3b8c5634e25cb980236e2a4c1db56ca" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "cafaad0ddc27f632ef699c1d79cc0b06" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "508e5bb418a84c5ac5b70235cf413d87" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "63a8a55a4c2202fbed96d7b206faaf89" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "0c8a6bc6c1d67c613481826224d5a555" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "207003909175bbc8c716bced14aab119" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "f248ff085bc95810a1239a65367cb1ce" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "91875e6f6defa8aba88fd87ba10db520" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "522b9afa6928983564067ba5dd523764" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "95d1bf5856cc8da9232b25a5b1d00bdc" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "84e9a243252469fdd92a2d9fa0063116" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "09bec1b4895ab2131781901ce3ff8b12" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "2c988fe815af325db8db4ecfc0754f1c" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "696a534c02144c87ebcfd957c59a9fb8" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "65911230245ce194c7d2f3d2ab7ea0b2" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "ebf9a9f9bb8bb8f28aa2efe58e93be89" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "47bc71a2c231389b803863ce280b1fa8" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "305eccf926407f367f5e8c2f5c66cc60" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "9aa9df54db29fa362a6c4243ac638ea2" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "5cc276fc555fcb85b16ec1bd33f2e4eb" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "310de39c394dd583b7b02facc917b399" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "b2b35167b76d0e430119305436e8bbc9" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 32923648, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 22437888 } ], "md5sum": "6a0de316a78d80a8be9f8beecb351e82" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "48da9eeb369fee83396cab84480cd841" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33208320, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1310720 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 1320960 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 29665280 } ], "md5sum": "16d6edfcf84fbb533faeeeb1ba92d6f7" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "34be2b5dcb73e890b3ba1c9d127229d7" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "30888d852d86761771fc0c546ce6d3e9" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "3b04e7d5ae22efa395571d4479f3b027" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "219b005b06673061e8d3786868744e13" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 33458176, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 7077888 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7088128 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 11020288 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 21506048 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22816768 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 22827008 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 26370048 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 33447936 } ], "md5sum": "8ef8dfe78958bd65cccba37070c42f31" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "2d454bdeff08158481fe07be7f4ac4bf" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "ffc9c7bd9cae526fd582aad2a36b23eb" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "5568f3b982713524bbd358e7aab3eb21" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 30302208, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 3932160 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 14417920 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15728640 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 15738880 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 19281920 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26359808 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 26370048 } ], "md5sum": "4a0fad45f1ba4433f234388da280b142" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "061fe863d0ef8316db73196b7c2307c1" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "1ee009765f2c84a91a9cd90292246dbf" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "efe78d994b1d3b8e545f5ead45f448c1" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "73256329d2a94277eb2e9c5022ceb016" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "3730dfcd04bd604da7c23eacbd0daa9f" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "2412e5b4ff494cddd14cc6823aac60ca" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "a4b8b2d94d99a53ecdb789e23597664d" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "e6e10f6d03703f35564ad43a6158dfbf" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "48c4b8aceb7db9a75311ecd761efa704" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "e3c36754984f1d2318ecb6bd4b78457a" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "ec8f50a6f275f5c45a7c3de9aa92e287" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "6776c4b3f8bce670eb91adb1498d100f" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "a36da2763a9a56450e25227b98c9eb02" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "fbfa208e20addea9c97bcc57113a29ed" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "628e3a91f5849f2b93ca5b7baafeb71d" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "77befcd28d8be9daf98c8409fc92a4b2" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "7c1d001b944daab166ab929b9c89127e" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "19edc9d3c56012c46cf888017f75c6be" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "dfd7d399073d2c49956b3f754cb45d55" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "28a51c6fdea48364cf17c42190c713c8" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "5359ac242b78e81ec42e1dccc8364af3" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "066ce33ec7712127e83c8cc08abe22e6" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "584500e8510ce5a4029133fcce8ed606" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "51430f1a502f17580f864420ac50cdac" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "66685beb1ac1aa0d43b929b244eff09e" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "070ccf5cafde328b2144c57120fef036" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "4de978984e5ed6cdbbd289bf307c6f91" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "cbd891bc2a7f9f7de05b4f7e58c48688" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "178572c3991bc1b598117565bc2e346e" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "61c7fc0f94f28c3cea0a10000ed50d1b" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "9cddd7c09e8c5406ffc3799baf045494" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "35465cb1085f65cce7df473b0bb936b5" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "778ca0b901c323afe31ec10f367c89d9" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "168fbdebbb234b034826597d46c4003d" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "cedfcac91b88fd728960ed57f07aef1a" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "e758922164119458bc1c1efed670789b" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "6a7743d5dff83cd45c6cf7691ce25bce" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "10fe461e3e252ab5610c718242c04716" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "a6728c64b1194a757c4f3d8e151abdff" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "b466e0b2b38548b778bd38dd6c80849f" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28344320, "byteOffset": 0 } ], "md5sum": "acff86b07643c47947f90d1dcae7daac" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "119e8c4673f754dca19be72915de5567" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "cb631e97dc5826011e16ff1e7292f099" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 26370048, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 346 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3543040, "byteOffset": 11806720 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 15349760 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22427648 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22437888 } ], "md5sum": "367185e2389e54b2e4e8b16643332eef" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 27648, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "f355c67c918b2767a5ec9389e34e75ef" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 15360, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "e378260e39e1d7f44a4eab42f6fb2f45" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 33292288, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 27648, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7077888, "byteOffset": 11796480 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 15360, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 18874368 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 22806528 } ], "md5sum": "5bbfccf19889e3dc41d2712c33f209ff" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 1310720, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 } ], "md5sum": "b883ba2d68de141a4e53e7c2e6a9f674" } ] }