{ "_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-vacv/lidengjie/projects/fsvd-llm/cache/llama-7b-hf/models/stage1-bf16-usv", "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "linear_info": { "model.layers.0.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2246 }, "model.layers.0.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1209 }, "model.layers.0.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1234 }, "model.layers.0.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 117 }, "model.layers.0.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 634 }, "model.layers.0.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 106 }, "model.layers.0.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1154 }, "model.layers.1.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1997 }, "model.layers.1.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1514 }, "model.layers.1.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1507 }, "model.layers.1.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 262 }, "model.layers.1.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1082 }, "model.layers.1.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 265 }, "model.layers.1.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 944 }, "model.layers.10.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 988 }, "model.layers.10.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 699 }, "model.layers.10.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1058 }, "model.layers.10.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 312 }, "model.layers.10.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 591 }, "model.layers.10.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 343 }, "model.layers.10.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 534 }, "model.layers.11.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1526 }, "model.layers.11.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1007 }, "model.layers.11.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1341 }, "model.layers.11.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 273 }, "model.layers.11.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 918 }, "model.layers.11.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 309 }, "model.layers.11.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 944 }, "model.layers.12.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1754 }, "model.layers.12.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1081 }, "model.layers.12.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1443 }, "model.layers.12.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 323 }, "model.layers.12.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 783 }, "model.layers.12.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 348 }, "model.layers.12.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 684 }, "model.layers.13.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1846 }, "model.layers.13.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1181 }, "model.layers.13.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1469 }, "model.layers.13.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 230 }, "model.layers.13.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 312 }, "model.layers.13.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 275 }, "model.layers.13.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 433 }, "model.layers.14.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1911 }, "model.layers.14.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1255 }, "model.layers.14.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1519 }, "model.layers.14.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 407 }, "model.layers.14.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1035 }, "model.layers.14.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 419 }, "model.layers.14.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 977 }, "model.layers.15.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2046 }, "model.layers.15.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1349 }, "model.layers.15.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1581 }, "model.layers.15.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 309 }, "model.layers.15.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1098 }, "model.layers.15.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 375 }, "model.layers.15.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1008 }, "model.layers.16.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2255 }, "model.layers.16.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1462 }, "model.layers.16.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1695 }, "model.layers.16.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 301 }, "model.layers.16.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 912 }, "model.layers.16.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 346 }, "model.layers.16.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 886 }, "model.layers.17.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2206 }, "model.layers.17.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1432 }, "model.layers.17.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1659 }, "model.layers.17.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 373 }, "model.layers.17.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1215 }, "model.layers.17.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 451 }, "model.layers.17.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1024 }, "model.layers.18.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2097 }, "model.layers.18.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1430 }, "model.layers.18.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1618 }, "model.layers.18.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 399 }, "model.layers.18.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1352 }, "model.layers.18.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 370 }, "model.layers.18.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1088 }, "model.layers.19.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2067 }, "model.layers.19.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1412 }, "model.layers.19.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1595 }, "model.layers.19.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 238 }, "model.layers.19.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 977 }, "model.layers.19.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 261 }, "model.layers.19.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 796 }, "model.layers.2.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2084 }, "model.layers.2.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1397 }, "model.layers.2.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1554 }, "model.layers.2.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 375 }, "model.layers.2.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1401 }, "model.layers.2.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 323 }, "model.layers.2.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1099 }, "model.layers.20.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2094 }, "model.layers.20.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1361 }, "model.layers.20.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1520 }, "model.layers.20.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 274 }, "model.layers.20.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1012 }, "model.layers.20.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 302 }, "model.layers.20.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 890 }, "model.layers.21.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2015 }, "model.layers.21.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1326 }, "model.layers.21.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1460 }, "model.layers.21.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 208 }, "model.layers.21.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 861 }, "model.layers.21.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 201 }, "model.layers.21.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 700 }, "model.layers.22.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1805 }, "model.layers.22.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1266 }, "model.layers.22.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1370 }, "model.layers.22.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 146 }, "model.layers.22.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 282 }, "model.layers.22.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 146 }, "model.layers.22.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 326 }, "model.layers.23.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1637 }, "model.layers.23.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1177 }, "model.layers.23.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1316 }, "model.layers.23.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 217 }, "model.layers.23.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 730 }, "model.layers.23.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 183 }, "model.layers.23.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 525 }, "model.layers.24.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1479 }, "model.layers.24.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1085 }, "model.layers.24.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1202 }, "model.layers.24.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 124 }, "model.layers.24.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 588 }, "model.layers.24.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 127 }, "model.layers.24.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 421 }, "model.layers.25.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1351 }, "model.layers.25.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1087 }, "model.layers.25.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1179 }, "model.layers.25.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 30 }, "model.layers.25.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 22 }, "model.layers.25.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 49 }, "model.layers.25.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 54 }, "model.layers.26.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1253 }, "model.layers.26.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1008 }, "model.layers.26.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1098 }, "model.layers.26.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 15 }, "model.layers.26.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 3 }, "model.layers.26.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 15 }, "model.layers.26.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 23 }, "model.layers.27.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1184 }, "model.layers.27.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1022 }, "model.layers.27.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1074 }, "model.layers.27.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 27 }, "model.layers.27.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 27 }, "model.layers.27.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 33 }, "model.layers.27.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 64 }, "model.layers.28.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1327 }, "model.layers.28.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1184 }, "model.layers.28.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1207 }, "model.layers.28.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 108 }, "model.layers.28.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 353 }, "model.layers.28.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 130 }, "model.layers.28.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 383 }, "model.layers.29.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1463 }, "model.layers.29.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1307 }, "model.layers.29.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1320 }, "model.layers.29.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 17 }, "model.layers.29.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 8 }, "model.layers.29.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 25 }, "model.layers.29.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 18 }, "model.layers.3.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1787 }, "model.layers.3.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1554 }, "model.layers.3.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1730 }, "model.layers.3.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 356 }, "model.layers.3.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 885 }, "model.layers.3.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 333 }, "model.layers.3.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 867 }, "model.layers.30.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1715 }, "model.layers.30.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1444 }, "model.layers.30.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1493 }, "model.layers.30.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 27 }, "model.layers.30.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 26 }, "model.layers.30.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 33 }, "model.layers.30.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 40 }, "model.layers.31.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2443 }, "model.layers.31.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1731 }, "model.layers.31.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1894 }, "model.layers.31.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 88 }, "model.layers.31.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 354 }, "model.layers.31.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 82 }, "model.layers.31.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 396 }, "model.layers.4.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1592 }, "model.layers.4.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1301 }, "model.layers.4.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1552 }, "model.layers.4.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 434 }, "model.layers.4.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1259 }, "model.layers.4.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 401 }, "model.layers.4.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 976 }, "model.layers.5.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1636 }, "model.layers.5.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1215 }, "model.layers.5.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1653 }, "model.layers.5.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 377 }, "model.layers.5.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 798 }, "model.layers.5.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 354 }, "model.layers.5.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 776 }, "model.layers.6.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1452 }, "model.layers.6.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1112 }, "model.layers.6.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1504 }, "model.layers.6.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 397 }, "model.layers.6.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 985 }, "model.layers.6.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 419 }, "model.layers.6.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 755 }, "model.layers.7.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1211 }, "model.layers.7.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 834 }, "model.layers.7.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1257 }, "model.layers.7.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 332 }, "model.layers.7.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 930 }, "model.layers.7.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 342 }, "model.layers.7.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 794 }, "model.layers.8.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1092 }, "model.layers.8.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 723 }, "model.layers.8.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1213 }, "model.layers.8.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 376 }, "model.layers.8.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 832 }, "model.layers.8.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 386 }, "model.layers.8.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 749 }, "model.layers.9.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 960 }, "model.layers.9.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 708 }, "model.layers.9.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1071 }, "model.layers.9.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 347 }, "model.layers.9.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 683 }, "model.layers.9.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 357 }, "model.layers.9.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 530 } }, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.43.2", "use_cache": true, "vocab_size": 32000 }