diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,8487 @@ +{ + "metadata": { + "ParamSize": 605, + "ParamBytes": 16773932032.0, + "BitsPerParam": 2.9740677095756953 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 106496000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32000, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 106496000, + "byteOffset": 0 + } + ], + "md5sum": "4a9683da8d160bc8e02a8d2302f3298e" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "f0c479c1acc0b1a553d391e6e0799ac4" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "243ea9c5b1a5195521bfa361f86d119c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "38a375df68921c769b75f6fd12bd3ccd" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 31789056, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32000, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3328000, + "byteOffset": 0 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3328000 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 3341312 + }, + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 5204992 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 8932352 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 8945664 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 31096832 + } + ], + "md5sum": "53e3e0f708b4b18e80cc38de0def01a3" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "a72678187d2adb5a1aebd8c829654203" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "e5a0371556c7e834540ebc6d1abb1630" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.59.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "3902e00421836bd309cc40c5492902c0" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 106496000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 106496000, + "byteOffset": 0 + } + ], + "md5sum": "174f3359ecc537e9b9c6316b2c299287" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32627712, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 0 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 2076672 + }, + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 2089984 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 3953664 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 7681024 + }, + { + "name": "model.layers.59.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 7694336 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 9771008 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 31922176 + }, + { + "name": "model.norm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 32614400 + } + ], + "md5sum": "a7439a3613a325b17d823265044443ca" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "4a7e70bd7e6ef57586326b7a28000f26" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "7a29f716007855b3f2ca50d3807a0221" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "1712e52b67c59e4ffbb2ddd0d8c61390" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33173504, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3328000, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3328000 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 3341312 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 5204992 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 8932352 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 8945664 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 11022336 + } + ], + "md5sum": "162ed9bbb674cb1328352715d0e9cea2" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "28bce4c6073b73001db6875ab980d90e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "17cb3283d97ec1cc5010fef398f5b9fe" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "0b0d579637636e65369b46415a3ee493" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "bcae21a3bfec4e4df2a9d956faa99b26" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "4c2573cbed16dcd76e2628ebbebbb08e" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33106944, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 692224 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 705536 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 2569216 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 6296576 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 6309888 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 8386560 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 30537728 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 31229952 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 31243264 + } + ], + "md5sum": "bb13d1fa015dba30357ac1bc710b8653" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "7fc181884f76a1bdc41ad69beeb50b26" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "b5732ab357c49eb4b2f21261559d381d" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "b83b1ef8697c235a2b4ff781c9bdf363" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "cd17b4a7347b07f12eabf13364eb070e" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "47ccebdf8bb2fc6ab148c0348794d267" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "f35704f303e5c9717f54f0dfb1f92bd2" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "b477f62f299d2d853fa670aaba95a7a2" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "6b5fd07dd22f3248884fc9eba0643b90" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32614400, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 28660736 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30737408 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30750720 + } + ], + "md5sum": "36c52113bdc98f7c3a7f4894a0c976d2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "6e4388f73ab2dcb6b10876b690ab4c84" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "f774c3b4c1612a833a0ab16c7dca937a" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "399bf19f00504cb74dcca1676f2e6540" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "3d9f5e67a012d02348bbd4f8cc60938e" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "55f40b4bed5cb822b6a680b0ff66ddef" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "67bb6b02f7ec07cf3b829fa123f58bcf" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "c303be8876d42c626e15711f5bc7b468" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "4058737169648bff0bd2a33de53d701e" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "279540bbb76119e3a00208fcf853edd9" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "727c26c74afcd64df7e782d6fe20c6e1" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "b0f90c57e6abdc18de2fab304afd2bbb" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "9eb254610797d1cc040670cbc449295e" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32614400, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 28660736 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30737408 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30750720 + } + ], + "md5sum": "e82e0e98eb0be433477e83ba84a514a0" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "c162ad910b70dddd49017bed0401c0ae" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "3b0b18f66cfc9bcefba4286ec6a04cea" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "031336e22755708fdb7d12c28e214623" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "4fb8ec28e8caf89d40a67b76ca6e6771" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "9535e501d56f76b10e28b6fc537ab022" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "3911608e312c741632a94efdba2ed670" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "2644fa7d27e5af85a2cf23654a945786" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "8d5881a1bc24b7812dd5697103207be5" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "c05683ccb9a64e2fd68ef1bdaa9895ba" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "cb502275e1f21efde7c64f0fd074b9fd" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "5c78011e396eb1003ad4694ed19ce12a" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "0bc778ecd74f293c452455f2ff4e204a" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "f19aa601ada4fb213e71bef57d841112" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "65d724ad91eb2f715da12b90d601bc6b" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 32441344, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3740672 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 3753984 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 5617664 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 9345024 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 9358336 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 11435008 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 12127232 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 12140544 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 14004224 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 17731584 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 17744896 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 18437120 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 18450432 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 20314112 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 24041472 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 24054784 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 26131456 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 26823680 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 26836992 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 28700672 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 32428032 + } + ], + "md5sum": "58feaaddc4861d8b181d16bcd242aae7" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "421ff4a216b2612b746f1d088b1e6c6c" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "ed0b5f663bd4b8f15bccd61d2dc0bd89" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "26269371684b24929b2ef775c6715fe9" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "c1c1b19717a838056d21ffd3d61fefcc" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "162d12f59f92c4acb7dbb2c10f85a18a" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33306624, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 2076672 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 24227840 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 24920064 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 24933376 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 26797056 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 30537728 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 32614400 + } + ], + "md5sum": "95f1fd00987341efcaef2e9f44344a7c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "b2e13d3e676b5d8e0f77a08fa5e6aab4" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "a0a6489910ad80ece1abfbebe429ca89" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "eab142b87842612dd7de3df96cf9363c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "2e05e98a989bcca8f13be620cf5b20d9" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 32414720, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3727360 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5804032 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27955200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28647424 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28660736 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30537728 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30551040 + } + ], + "md5sum": "6babddb28e3547ffc67fcdfa5b30e413" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "10d0080cf4e102a67db14c8f0effb97f" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "9cb472c20693ff91bae4f733516ad52a" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "6e9cebaafec1c5bb6f43afb2ff5dd251" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "9b9846079d9f868b0b07ecf7e79b3841" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "d81d00dc67cfcc545e696a6ff92470a2" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "75ae1fea3b1afd71c444c63ac01e9f37" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "13fc9adeafb3a41219dc7c967a426f6b" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "7fd8725aadfc7031a6dbcf3e560f152b" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "b9e0f51d296bc5e4a0572dd3ad27f126" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "1ef13466b52c04de5dc63f390a02887e" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "10df957a112f21cdc46ccdcca33f7116" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "c080df1cc8c954e46fad703ef17d26cd" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "8ec6805e12381f5ea2905521886b7451" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "ab1d6d4fb5176bfadc23607e791920e9" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "f5e6b6b260d7cb74e7fdaa49c3b4b10f" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "7780955c9b94364ee2ac399c7257de7a" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 32614400, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 28660736 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30737408 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30750720 + } + ], + "md5sum": "47c7a9aaecbe94411a63acba8e233fcc" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "c2817f041337b78f62906a9d4b2e8dd2" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "620cf995306ccce01eaec88d10d674d5" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "d9cb4f4cb296c90be35016329ceccff2" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32201728, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 3740672 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 25891840 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 26584064 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 26597376 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 28461056 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 32188416 + } + ], + "md5sum": "2c2b955455f73f9398d97bb5c4095d60" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "063809a985309cfcef57624f0c3e03f1" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "25061d4f80057b2f0bf97bc2d25880d5" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "8bff658554d4a81406ba2f09ab064483" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "9597a1ed3573636c9734d411db1f6d94" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "8deeb5c0bf54d7aa87f4dce44fb7a505" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 33319936, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 2076672 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 24227840 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 24920064 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 24933376 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 26797056 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 30537728 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 32614400 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 33306624 + } + ], + "md5sum": "7b2e6d4f4e9bd8ee787787d53eef1bb3" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "247a95c150776bcecd5c675387dfcb92" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "596507d7c8baa7879689c4bb49228d2a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "0608b3c414abc10bb0ff480e2bb01e09" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 30524416, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 1863680 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 5591040 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 5604352 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 7681024 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 29832192 + } + ], + "md5sum": "465ffd9e377c6f107879b7c5aa22734d" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "507716465029d1039bcb8587af2353e8" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "d55d4acf35649a36343df1f00b972ea5" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "255feb90948063bf932e0836aaaf1069" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "c04cd0efe3e40e8c1d5e7f4e6117350a" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 32414720, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3727360 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5804032 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27955200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28647424 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28660736 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30537728 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30551040 + } + ], + "md5sum": "e9d6259617e02bd7f604a00f9703f4cd" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "66cdbbe51d325472b82d355c2a43b660" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "088874ed2b9d35e812c91ea24402cd5a" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "1aef636fde114be1b373f1ac966c76ca" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "9bc9ab6fe808dc9e8363aeac4d6ab4f9" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "0b150b6d95ee0b5dc59aee398db052fc" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "0c12a4ffba4ac73f529950f184d27833" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "474492bbb3a1609e9e93b8e7091aac4c" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "75147c2e2b4a0156544a585dab050dec" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "cd330c1fce37bc837a19ae6988ec64c1" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "9cb522ac18bdc209d47ba7a161ea4fd9" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "4b772dd309bd4f65aead308e9960f077" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "2416b320217b81a2a1690ec155d2272d" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "e02350e3c569229ce6e9b5ce3920b1cd" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "0629e0a612c9ae112d254a908b219f0e" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "e716b82634b769acc66a3c0b5228bfdc" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "d001049376225c3099c5c96533ec42e4" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 32614400, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 28660736 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30737408 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30750720 + } + ], + "md5sum": "85773b7cd8ca878d357ca016869c84cf" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "22bfb4218cf021cd31337fed3774cebb" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "132b820b5a329bc79d2d7fae704820a6" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "7b2fd856536524b21de463725431b9d8" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 32201728, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 3740672 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 25891840 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 26584064 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 26597376 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 28461056 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 32188416 + } + ], + "md5sum": "0de6777418f4f55813ab7908f30bff37" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "83bdcef1d414a039a2ed91c29ccc1af1" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "dd7d3c8cb575a5e80cd38c631b8bbf87" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "99cdfea93bc94391fe1b8de0cf6c06fe" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "fce502555be7338fae74d61d9b986fd3" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "ff5b8786de3e9a263322aedb27503a3a" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 33319936, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 2076672 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 24227840 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 24920064 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 24933376 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 26797056 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 30537728 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 32614400 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 33306624 + } + ], + "md5sum": "cde34562f526e3561622a7aa90762e52" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "f03801c1a48f339c0c83ad77a9e2c0fa" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "38676999cb79092c42ee662dd25f072f" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "1da4854d8d8e86d091bd8293fec71810" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 30524416, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 1863680 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 5591040 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 5604352 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 7681024 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 29832192 + } + ], + "md5sum": "051eaaa4510a97df0f3700874bd1b7f7" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "71bf57a9bafde8ad29178e419a34033c" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "f2e4b778d3d17ef1ee37eb294f368d1f" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "ecb6f4df4cb8f73ee2ad46bdb66d3583" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "8650ba9dfaad0ad367e561725d0c191c" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 32414720, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3727360 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5804032 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27955200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28647424 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28660736 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30537728 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30551040 + } + ], + "md5sum": "5335e32053e97ce20fceb9f750e489a5" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "956b3bff9144d4b88daf7f86651510ae" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "0c43757248fe4d90d18f013f6c0631dc" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "d1e97ab688f20a87de755723d96c56fe" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "a11db8ef277b1a8abdd0dcb20fd2fea2" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "b97accf896a5337367723a5eec471fb3" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "52e5cd60ead3e3279734b3cc1c4cfb01" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "cf395e9122855d0203e54121b8a44edf" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "ca00078fcc12c8dbe662448ceca57930" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "4eea54649e6cb8400907b3d14ddb6c09" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "9a8896da204916b977b1ad665740509a" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "658b39fac4fab685834a65cce989c2c5" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "1e19113712e6a7028a3e1b7aa85357bf" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "181f6c924200ab75fcdeef3357515fd5" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "2cc2b31c8959484f26e68210c602ec07" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "fe7973aa67e517c0767db7d4d5cfa524" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "2ecac826d13afdf328fa8fa1b00396f7" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 32614400, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 28660736 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30737408 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30750720 + } + ], + "md5sum": "7fc02c14e299b71ff4d33ccb8a87dd3d" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "17a9a19c05111a38eaef0e183b16843e" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "e78982b1bb33b8fce694953105533302" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "01d77128449e4935f0f7ffb5c69bbd85" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 32201728, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 3740672 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 25891840 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 26584064 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 26597376 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 28461056 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 32188416 + } + ], + "md5sum": "962654c9dd2be8fd2ca7358448a3018d" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "61f4056e844598167ff1c57df5582c89" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "e155a4c42d7596e268a8696e145c96f4" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "b54323b271f9cc60c6d2f43e62666b00" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "ef43cff6507eb8dfc55f82a5603be388" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "0c8a8577f8d48e26d1c135924d47ccf4" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 33319936, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 2076672 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 24227840 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 24920064 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 24933376 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 26797056 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 30537728 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 32614400 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 33306624 + } + ], + "md5sum": "19a548dd442fe441777a2b0f701b89f9" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "f53a59672192863bf588b25d8d5063ea" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "a844714484efa1ca7f03317a5540f4ca" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "ac6004ddf8c978fb3d48b7a849eb9808" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "6855cc02f353ba3fe3ef9650a35cc628" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "64d1cac6c1eafe233fb50d6f6571c264" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 33306624, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 1863680 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 5591040 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 5604352 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 7681024 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 29832192 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 30524416 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 32601088 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 33293312 + } + ], + "md5sum": "8ca83ec3b6447f335758827a728cd43a" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "e11142a0fa836d2f0e1a0edc96ab5a11" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "d3f7e8431f13eb1cde66d116b0a670ac" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "e3334991188344af8d67b25a70c9f774" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 30324736, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 0 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 1863680 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 5591040 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5604352 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27755520 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28447744 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28461056 + } + ], + "md5sum": "f41fa6575e1095c66078f458713a4e4e" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "53163bc4185956826b7d27655c655bc7" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "15d7b91ffc7239df07b9f69539b631b0" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "5c638bd197022a411b04f1dd420e5c26" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "40a4fcffacd8ee04e9a5018f0d1292b4" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "66c0f95b849b1dbcefb76a6244058718" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "fca28ec20f87863cb3ddcee2a8bc45e6" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "0a1a5dfda53d87aa2236fcf08c761e85" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "257d5eaf0b692ce98c800bd21218a71e" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "0a1a96b9bd74201bdf9320b0e5cb1aa3" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "a3b59171edbe05b9df92cdc163c54152" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "3f6371ab8e22dc33114858a8c3c61edc" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 32388096, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 28660736 + } + ], + "md5sum": "d843635d739dc97346a3b7dade2256f9" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "0034f89acc74434be2df58caabdb2a31" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "f526e337167bb54f353826b50428ef7d" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "2ba92eeb198264308cb7303ee55b0c06" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "d78ded755ffdb5c34a9f481447e4760f" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 32428032, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 2076672 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 24227840 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 24920064 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 24933376 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 26797056 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 26810368 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 26823680 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 28687360 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 32414720 + } + ], + "md5sum": "f2edc07f39987b55705d21396ebd384f" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "d7a71d6f6cc75d0787002a27518b5624" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "bfd9395bd00b148e9a1ddb00169da8f3" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "9c9d88294407c9c9e0da39e5a1b0c1fa" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "59509a91f76f71854ec53325fa3cfcd9" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "60651345b43bd9216d9e5e4455e24469" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 33319936, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 2076672 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 24227840 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 24920064 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 24933376 + }, + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 26797056 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 30537728 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 32614400 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 33306624 + } + ], + "md5sum": "9a6c4ed8349297812dea26f71ddaf51f" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "0fdae7dec6c9ee4c80b68a0b35686e5d" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "8511314b9cc4563ecc235d5ef19bb5e7" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "8d3572e1d00df2efa2788afc5bfacda7" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "f1043c114d0133e11134cf33e0e36ec3" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 32401408, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 0 + }, + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 1863680 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 5591040 + }, + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 5604352 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 7681024 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 29832192 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30537728 + } + ], + "md5sum": "eab8ef1161ddd53294ce10abccc4b461" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "f7a5d8163e365109dffd139618317419" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "9e0e98c25f86a2907e09ca368318f024" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "cda78c2b0594c5ad2c041a3d6918516b" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "503b165d7351767758f99b532c7315f8" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 32614400, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.48.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.49.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 28660736 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30737408 + }, + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30750720 + } + ], + "md5sum": "eda925713c40cdd600026b6093b2d111" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "80b0a6132b3ca1bfe33453e0146cea9c" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "3071e66e45a4c95e09ac39b1fcdc52d8" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "49e232508a4e1d576d888b51f23509d3" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 32201728, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 3740672 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 25891840 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 26584064 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 26597376 + }, + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 28461056 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 32188416 + } + ], + "md5sum": "fbb373016151839ae24f59722255acb9" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "6d24dfa09ac126ba44500e46bc944514" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "c1e1c03881c8925e5724e1ae518ee925" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "652065ca862cda7c10007eb2f29a0f4b" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 22151168, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 0 + } + ], + "md5sum": "8288e825c0480e5e57070a35a144fc4b" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "5cddccd4c3970a8f5bab963b26bdaec4" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 33319936, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 2076672 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 24227840 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 24920064 + }, + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 24933376 + }, + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 26797056 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.51.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 30537728 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 32614400 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 33306624 + } + ], + "md5sum": "ab0a5cc0d8f6cc0198832682716f9136" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "3f14751a5c5a1c523163e5316b4ed137" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "abf241a662a54af888104c483a765b83" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "45cf99dc8a23729c7a0a35c889c7412e" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 30524416, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 0 + }, + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 1863680 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 5591040 + }, + { + "name": "model.layers.52.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 5604352 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 7681024 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 29832192 + } + ], + "md5sum": "3bfe762aec5b2390b6a17e2a1ca38561" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "4ece56675664829594956feaec256ecf" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "b18605decf2fa5b98d0f91e836748b55" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "ba7e40878ff26d3f14e99e7ed732478c" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "fa576548e167d79c2bf3eb90ed3e1375" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 32414720, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.53.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3727360 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5804032 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27955200 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28647424 + }, + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28660736 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30524416 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 30537728 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 30551040 + } + ], + "md5sum": "02cbf37c41b53d997a3dd68cb214d07e" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "c62bcb6ea07afbf6b20a5b5310ffd843" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "e71766bea8e4249567348f2c1a1172a3" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "ae1e27bf20b35b2d6e515b2b2b5670ce" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.54.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "e5fc86a6c0e1d3468a8e779b6a489015" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "ac2c8401f4e24484137d203fe6d1576d" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "0be37db7dae009248faf3969678b33ec" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "a6c5a1ec76ea802dfc884f0944653739" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.55.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "1b5d3ee46a0c272cdb357400168a1c18" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "4a679280ca9d9dda5ea52bddad68cb71" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 59637760, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 6656, + 2240 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 59637760, + "byteOffset": 0 + } + ], + "md5sum": "14883770cbb18c842d988132727df6c0" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 119275520, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 35840, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 119275520, + "byteOffset": 0 + } + ], + "md5sum": "ba37ae951568fea6d0a792d9b340cd45" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 30537728, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.56.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 28660736 + }, + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 6656, + 140 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1863680, + "byteOffset": 28674048 + } + ], + "md5sum": "86e11d6e2a150a5e21b4fdc1c4c4a7e7" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 66453504, + "records": [ + { + "name": "model.layers.57.self_attn.qkv_proj.q_weight", + "shape": [ + 19968, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 66453504, + "byteOffset": 0 + } + ], + "md5sum": "a5cfdb073d078be658a992d489e17c53" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 28660736, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 35840, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3727360, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 6656 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13312, + "byteOffset": 3727360 + }, + { + "name": "model.layers.57.self_attn.qkv_proj.q_scale", + "shape": [ + 19968, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2076672, + "byteOffset": 3740672 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 6656, + 832 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22151168, + "byteOffset": 5817344 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 6656, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 692224, + "byteOffset": 27968512 + } + ], + "md5sum": "ccdcd268c649d59da495f4d6ae8c8b2b" + } + ] +} \ No newline at end of file