|
{ |
|
"_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-vacv/lidengjie/projects/fsvd-llm/cache/llama-7b-hf/models/stage1-bf16-usv", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 11008, |
|
"linear_info": { |
|
"model.layers.0.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2246 |
|
}, |
|
"model.layers.0.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1209 |
|
}, |
|
"model.layers.0.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1234 |
|
}, |
|
"model.layers.0.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 117 |
|
}, |
|
"model.layers.0.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 634 |
|
}, |
|
"model.layers.0.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 106 |
|
}, |
|
"model.layers.0.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1154 |
|
}, |
|
"model.layers.1.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1997 |
|
}, |
|
"model.layers.1.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1514 |
|
}, |
|
"model.layers.1.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1507 |
|
}, |
|
"model.layers.1.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 262 |
|
}, |
|
"model.layers.1.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1082 |
|
}, |
|
"model.layers.1.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 265 |
|
}, |
|
"model.layers.1.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 944 |
|
}, |
|
"model.layers.10.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 988 |
|
}, |
|
"model.layers.10.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 699 |
|
}, |
|
"model.layers.10.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1058 |
|
}, |
|
"model.layers.10.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 312 |
|
}, |
|
"model.layers.10.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 591 |
|
}, |
|
"model.layers.10.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 343 |
|
}, |
|
"model.layers.10.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 534 |
|
}, |
|
"model.layers.11.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1526 |
|
}, |
|
"model.layers.11.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1007 |
|
}, |
|
"model.layers.11.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1341 |
|
}, |
|
"model.layers.11.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 273 |
|
}, |
|
"model.layers.11.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 918 |
|
}, |
|
"model.layers.11.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 309 |
|
}, |
|
"model.layers.11.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 944 |
|
}, |
|
"model.layers.12.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1754 |
|
}, |
|
"model.layers.12.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1081 |
|
}, |
|
"model.layers.12.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1443 |
|
}, |
|
"model.layers.12.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 323 |
|
}, |
|
"model.layers.12.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 783 |
|
}, |
|
"model.layers.12.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 348 |
|
}, |
|
"model.layers.12.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 684 |
|
}, |
|
"model.layers.13.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1846 |
|
}, |
|
"model.layers.13.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1181 |
|
}, |
|
"model.layers.13.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1469 |
|
}, |
|
"model.layers.13.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 230 |
|
}, |
|
"model.layers.13.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 312 |
|
}, |
|
"model.layers.13.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 275 |
|
}, |
|
"model.layers.13.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 433 |
|
}, |
|
"model.layers.14.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1911 |
|
}, |
|
"model.layers.14.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1255 |
|
}, |
|
"model.layers.14.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1519 |
|
}, |
|
"model.layers.14.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 407 |
|
}, |
|
"model.layers.14.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1035 |
|
}, |
|
"model.layers.14.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 419 |
|
}, |
|
"model.layers.14.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 977 |
|
}, |
|
"model.layers.15.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2046 |
|
}, |
|
"model.layers.15.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1349 |
|
}, |
|
"model.layers.15.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1581 |
|
}, |
|
"model.layers.15.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 309 |
|
}, |
|
"model.layers.15.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1098 |
|
}, |
|
"model.layers.15.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 375 |
|
}, |
|
"model.layers.15.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1008 |
|
}, |
|
"model.layers.16.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2255 |
|
}, |
|
"model.layers.16.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1462 |
|
}, |
|
"model.layers.16.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1695 |
|
}, |
|
"model.layers.16.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 301 |
|
}, |
|
"model.layers.16.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 912 |
|
}, |
|
"model.layers.16.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 346 |
|
}, |
|
"model.layers.16.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 886 |
|
}, |
|
"model.layers.17.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2206 |
|
}, |
|
"model.layers.17.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1432 |
|
}, |
|
"model.layers.17.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1659 |
|
}, |
|
"model.layers.17.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 373 |
|
}, |
|
"model.layers.17.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1215 |
|
}, |
|
"model.layers.17.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 451 |
|
}, |
|
"model.layers.17.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1024 |
|
}, |
|
"model.layers.18.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2097 |
|
}, |
|
"model.layers.18.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1430 |
|
}, |
|
"model.layers.18.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1618 |
|
}, |
|
"model.layers.18.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 399 |
|
}, |
|
"model.layers.18.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1352 |
|
}, |
|
"model.layers.18.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 370 |
|
}, |
|
"model.layers.18.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1088 |
|
}, |
|
"model.layers.19.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2067 |
|
}, |
|
"model.layers.19.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1412 |
|
}, |
|
"model.layers.19.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1595 |
|
}, |
|
"model.layers.19.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 238 |
|
}, |
|
"model.layers.19.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 977 |
|
}, |
|
"model.layers.19.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 261 |
|
}, |
|
"model.layers.19.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 796 |
|
}, |
|
"model.layers.2.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2084 |
|
}, |
|
"model.layers.2.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1397 |
|
}, |
|
"model.layers.2.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1554 |
|
}, |
|
"model.layers.2.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 375 |
|
}, |
|
"model.layers.2.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1401 |
|
}, |
|
"model.layers.2.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 323 |
|
}, |
|
"model.layers.2.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1099 |
|
}, |
|
"model.layers.20.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2094 |
|
}, |
|
"model.layers.20.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1361 |
|
}, |
|
"model.layers.20.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1520 |
|
}, |
|
"model.layers.20.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 274 |
|
}, |
|
"model.layers.20.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1012 |
|
}, |
|
"model.layers.20.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 302 |
|
}, |
|
"model.layers.20.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 890 |
|
}, |
|
"model.layers.21.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2015 |
|
}, |
|
"model.layers.21.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1326 |
|
}, |
|
"model.layers.21.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1460 |
|
}, |
|
"model.layers.21.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 208 |
|
}, |
|
"model.layers.21.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 861 |
|
}, |
|
"model.layers.21.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 201 |
|
}, |
|
"model.layers.21.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 700 |
|
}, |
|
"model.layers.22.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1805 |
|
}, |
|
"model.layers.22.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1266 |
|
}, |
|
"model.layers.22.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1370 |
|
}, |
|
"model.layers.22.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 146 |
|
}, |
|
"model.layers.22.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 282 |
|
}, |
|
"model.layers.22.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 146 |
|
}, |
|
"model.layers.22.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 326 |
|
}, |
|
"model.layers.23.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1637 |
|
}, |
|
"model.layers.23.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1177 |
|
}, |
|
"model.layers.23.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1316 |
|
}, |
|
"model.layers.23.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 217 |
|
}, |
|
"model.layers.23.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 730 |
|
}, |
|
"model.layers.23.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 183 |
|
}, |
|
"model.layers.23.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 525 |
|
}, |
|
"model.layers.24.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1479 |
|
}, |
|
"model.layers.24.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1085 |
|
}, |
|
"model.layers.24.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1202 |
|
}, |
|
"model.layers.24.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 124 |
|
}, |
|
"model.layers.24.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 588 |
|
}, |
|
"model.layers.24.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 127 |
|
}, |
|
"model.layers.24.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 421 |
|
}, |
|
"model.layers.25.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1351 |
|
}, |
|
"model.layers.25.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1087 |
|
}, |
|
"model.layers.25.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1179 |
|
}, |
|
"model.layers.25.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 30 |
|
}, |
|
"model.layers.25.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 22 |
|
}, |
|
"model.layers.25.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 49 |
|
}, |
|
"model.layers.25.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 54 |
|
}, |
|
"model.layers.26.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1253 |
|
}, |
|
"model.layers.26.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1008 |
|
}, |
|
"model.layers.26.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1098 |
|
}, |
|
"model.layers.26.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 15 |
|
}, |
|
"model.layers.26.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 3 |
|
}, |
|
"model.layers.26.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 15 |
|
}, |
|
"model.layers.26.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 23 |
|
}, |
|
"model.layers.27.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1184 |
|
}, |
|
"model.layers.27.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1022 |
|
}, |
|
"model.layers.27.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1074 |
|
}, |
|
"model.layers.27.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 27 |
|
}, |
|
"model.layers.27.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 27 |
|
}, |
|
"model.layers.27.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 33 |
|
}, |
|
"model.layers.27.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 64 |
|
}, |
|
"model.layers.28.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1327 |
|
}, |
|
"model.layers.28.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1184 |
|
}, |
|
"model.layers.28.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1207 |
|
}, |
|
"model.layers.28.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 108 |
|
}, |
|
"model.layers.28.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 353 |
|
}, |
|
"model.layers.28.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 130 |
|
}, |
|
"model.layers.28.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 383 |
|
}, |
|
"model.layers.29.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1463 |
|
}, |
|
"model.layers.29.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1307 |
|
}, |
|
"model.layers.29.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1320 |
|
}, |
|
"model.layers.29.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 17 |
|
}, |
|
"model.layers.29.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 8 |
|
}, |
|
"model.layers.29.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 25 |
|
}, |
|
"model.layers.29.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 18 |
|
}, |
|
"model.layers.3.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1787 |
|
}, |
|
"model.layers.3.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1554 |
|
}, |
|
"model.layers.3.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1730 |
|
}, |
|
"model.layers.3.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 356 |
|
}, |
|
"model.layers.3.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 885 |
|
}, |
|
"model.layers.3.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 333 |
|
}, |
|
"model.layers.3.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 867 |
|
}, |
|
"model.layers.30.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1715 |
|
}, |
|
"model.layers.30.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1444 |
|
}, |
|
"model.layers.30.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1493 |
|
}, |
|
"model.layers.30.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 27 |
|
}, |
|
"model.layers.30.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 26 |
|
}, |
|
"model.layers.30.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 33 |
|
}, |
|
"model.layers.30.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 40 |
|
}, |
|
"model.layers.31.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2443 |
|
}, |
|
"model.layers.31.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1731 |
|
}, |
|
"model.layers.31.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1894 |
|
}, |
|
"model.layers.31.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 88 |
|
}, |
|
"model.layers.31.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 354 |
|
}, |
|
"model.layers.31.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 82 |
|
}, |
|
"model.layers.31.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 396 |
|
}, |
|
"model.layers.4.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1592 |
|
}, |
|
"model.layers.4.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1301 |
|
}, |
|
"model.layers.4.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1552 |
|
}, |
|
"model.layers.4.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 434 |
|
}, |
|
"model.layers.4.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1259 |
|
}, |
|
"model.layers.4.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 401 |
|
}, |
|
"model.layers.4.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 976 |
|
}, |
|
"model.layers.5.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1636 |
|
}, |
|
"model.layers.5.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1215 |
|
}, |
|
"model.layers.5.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1653 |
|
}, |
|
"model.layers.5.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 377 |
|
}, |
|
"model.layers.5.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 798 |
|
}, |
|
"model.layers.5.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 354 |
|
}, |
|
"model.layers.5.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 776 |
|
}, |
|
"model.layers.6.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1452 |
|
}, |
|
"model.layers.6.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1112 |
|
}, |
|
"model.layers.6.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1504 |
|
}, |
|
"model.layers.6.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 397 |
|
}, |
|
"model.layers.6.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 985 |
|
}, |
|
"model.layers.6.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 419 |
|
}, |
|
"model.layers.6.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 755 |
|
}, |
|
"model.layers.7.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1211 |
|
}, |
|
"model.layers.7.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 834 |
|
}, |
|
"model.layers.7.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1257 |
|
}, |
|
"model.layers.7.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 332 |
|
}, |
|
"model.layers.7.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 930 |
|
}, |
|
"model.layers.7.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 342 |
|
}, |
|
"model.layers.7.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 794 |
|
}, |
|
"model.layers.8.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1092 |
|
}, |
|
"model.layers.8.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 723 |
|
}, |
|
"model.layers.8.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1213 |
|
}, |
|
"model.layers.8.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 376 |
|
}, |
|
"model.layers.8.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 832 |
|
}, |
|
"model.layers.8.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 386 |
|
}, |
|
"model.layers.8.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 749 |
|
}, |
|
"model.layers.9.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 960 |
|
}, |
|
"model.layers.9.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 708 |
|
}, |
|
"model.layers.9.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1071 |
|
}, |
|
"model.layers.9.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 347 |
|
}, |
|
"model.layers.9.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 683 |
|
}, |
|
"model.layers.9.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 357 |
|
}, |
|
"model.layers.9.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 530 |
|
} |
|
}, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 32, |
|
"pad_token_id": 0, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.43.2", |
|
"use_cache": true, |
|
"vocab_size": 32000 |
|
} |
|
|