llama-7b-hf-c0.6 / config.json
李登杰
init
fdf1957
{
"_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-vacv/lidengjie/projects/fsvd-llm/cache/llama-7b-hf/models/stage1-bf16-usv",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"linear_info": {
"model.layers.0.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2246
},
"model.layers.0.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1209
},
"model.layers.0.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1234
},
"model.layers.0.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 117
},
"model.layers.0.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 634
},
"model.layers.0.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 106
},
"model.layers.0.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1154
},
"model.layers.1.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1997
},
"model.layers.1.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1514
},
"model.layers.1.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1507
},
"model.layers.1.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 262
},
"model.layers.1.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1082
},
"model.layers.1.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 265
},
"model.layers.1.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 944
},
"model.layers.10.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 988
},
"model.layers.10.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 699
},
"model.layers.10.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1058
},
"model.layers.10.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 312
},
"model.layers.10.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 591
},
"model.layers.10.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 343
},
"model.layers.10.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 534
},
"model.layers.11.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1526
},
"model.layers.11.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1007
},
"model.layers.11.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1341
},
"model.layers.11.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 273
},
"model.layers.11.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 918
},
"model.layers.11.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 309
},
"model.layers.11.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 944
},
"model.layers.12.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1754
},
"model.layers.12.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1081
},
"model.layers.12.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1443
},
"model.layers.12.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 323
},
"model.layers.12.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 783
},
"model.layers.12.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 348
},
"model.layers.12.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 684
},
"model.layers.13.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1846
},
"model.layers.13.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1181
},
"model.layers.13.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1469
},
"model.layers.13.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 230
},
"model.layers.13.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 312
},
"model.layers.13.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 275
},
"model.layers.13.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 433
},
"model.layers.14.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1911
},
"model.layers.14.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1255
},
"model.layers.14.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1519
},
"model.layers.14.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 407
},
"model.layers.14.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1035
},
"model.layers.14.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 419
},
"model.layers.14.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 977
},
"model.layers.15.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2046
},
"model.layers.15.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1349
},
"model.layers.15.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1581
},
"model.layers.15.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 309
},
"model.layers.15.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1098
},
"model.layers.15.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 375
},
"model.layers.15.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1008
},
"model.layers.16.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2255
},
"model.layers.16.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1462
},
"model.layers.16.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1695
},
"model.layers.16.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 301
},
"model.layers.16.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 912
},
"model.layers.16.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 346
},
"model.layers.16.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 886
},
"model.layers.17.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2206
},
"model.layers.17.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1432
},
"model.layers.17.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1659
},
"model.layers.17.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 373
},
"model.layers.17.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1215
},
"model.layers.17.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 451
},
"model.layers.17.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1024
},
"model.layers.18.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2097
},
"model.layers.18.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1430
},
"model.layers.18.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1618
},
"model.layers.18.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 399
},
"model.layers.18.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1352
},
"model.layers.18.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 370
},
"model.layers.18.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1088
},
"model.layers.19.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2067
},
"model.layers.19.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1412
},
"model.layers.19.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1595
},
"model.layers.19.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 238
},
"model.layers.19.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 977
},
"model.layers.19.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 261
},
"model.layers.19.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 796
},
"model.layers.2.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2084
},
"model.layers.2.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1397
},
"model.layers.2.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1554
},
"model.layers.2.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 375
},
"model.layers.2.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1401
},
"model.layers.2.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 323
},
"model.layers.2.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1099
},
"model.layers.20.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2094
},
"model.layers.20.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1361
},
"model.layers.20.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1520
},
"model.layers.20.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 274
},
"model.layers.20.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1012
},
"model.layers.20.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 302
},
"model.layers.20.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 890
},
"model.layers.21.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2015
},
"model.layers.21.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1326
},
"model.layers.21.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1460
},
"model.layers.21.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 208
},
"model.layers.21.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 861
},
"model.layers.21.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 201
},
"model.layers.21.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 700
},
"model.layers.22.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1805
},
"model.layers.22.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1266
},
"model.layers.22.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1370
},
"model.layers.22.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 146
},
"model.layers.22.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 282
},
"model.layers.22.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 146
},
"model.layers.22.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 326
},
"model.layers.23.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1637
},
"model.layers.23.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1177
},
"model.layers.23.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1316
},
"model.layers.23.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 217
},
"model.layers.23.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 730
},
"model.layers.23.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 183
},
"model.layers.23.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 525
},
"model.layers.24.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1479
},
"model.layers.24.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1085
},
"model.layers.24.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1202
},
"model.layers.24.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 124
},
"model.layers.24.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 588
},
"model.layers.24.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 127
},
"model.layers.24.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 421
},
"model.layers.25.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1351
},
"model.layers.25.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1087
},
"model.layers.25.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1179
},
"model.layers.25.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 30
},
"model.layers.25.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 22
},
"model.layers.25.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 49
},
"model.layers.25.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 54
},
"model.layers.26.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1253
},
"model.layers.26.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1008
},
"model.layers.26.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1098
},
"model.layers.26.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 15
},
"model.layers.26.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 3
},
"model.layers.26.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 15
},
"model.layers.26.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 23
},
"model.layers.27.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1184
},
"model.layers.27.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1022
},
"model.layers.27.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1074
},
"model.layers.27.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 27
},
"model.layers.27.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 27
},
"model.layers.27.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 33
},
"model.layers.27.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 64
},
"model.layers.28.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1327
},
"model.layers.28.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1184
},
"model.layers.28.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1207
},
"model.layers.28.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 108
},
"model.layers.28.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 353
},
"model.layers.28.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 130
},
"model.layers.28.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 383
},
"model.layers.29.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1463
},
"model.layers.29.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1307
},
"model.layers.29.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1320
},
"model.layers.29.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 17
},
"model.layers.29.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 8
},
"model.layers.29.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 25
},
"model.layers.29.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 18
},
"model.layers.3.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1787
},
"model.layers.3.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1554
},
"model.layers.3.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1730
},
"model.layers.3.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 356
},
"model.layers.3.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 885
},
"model.layers.3.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 333
},
"model.layers.3.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 867
},
"model.layers.30.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1715
},
"model.layers.30.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1444
},
"model.layers.30.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1493
},
"model.layers.30.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 27
},
"model.layers.30.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 26
},
"model.layers.30.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 33
},
"model.layers.30.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 40
},
"model.layers.31.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 2443
},
"model.layers.31.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1731
},
"model.layers.31.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1894
},
"model.layers.31.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 88
},
"model.layers.31.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 354
},
"model.layers.31.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 82
},
"model.layers.31.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 396
},
"model.layers.4.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1592
},
"model.layers.4.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1301
},
"model.layers.4.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1552
},
"model.layers.4.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 434
},
"model.layers.4.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 1259
},
"model.layers.4.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 401
},
"model.layers.4.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 976
},
"model.layers.5.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1636
},
"model.layers.5.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1215
},
"model.layers.5.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1653
},
"model.layers.5.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 377
},
"model.layers.5.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 798
},
"model.layers.5.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 354
},
"model.layers.5.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 776
},
"model.layers.6.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1452
},
"model.layers.6.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1112
},
"model.layers.6.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1504
},
"model.layers.6.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 397
},
"model.layers.6.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 985
},
"model.layers.6.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 419
},
"model.layers.6.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 755
},
"model.layers.7.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1211
},
"model.layers.7.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 834
},
"model.layers.7.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1257
},
"model.layers.7.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 332
},
"model.layers.7.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 930
},
"model.layers.7.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 342
},
"model.layers.7.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 794
},
"model.layers.8.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 1092
},
"model.layers.8.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 723
},
"model.layers.8.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1213
},
"model.layers.8.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 376
},
"model.layers.8.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 832
},
"model.layers.8.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 386
},
"model.layers.8.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 749
},
"model.layers.9.mlp.down_proj": {
"bias": true,
"in_features": 11008,
"out_features": 4096,
"rank": 960
},
"model.layers.9.mlp.gate_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 708
},
"model.layers.9.mlp.up_proj": {
"bias": true,
"in_features": 4096,
"out_features": 11008,
"rank": 1071
},
"model.layers.9.self_attn.k_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 347
},
"model.layers.9.self_attn.o_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 683
},
"model.layers.9.self_attn.q_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 357
},
"model.layers.9.self_attn.v_proj": {
"bias": true,
"in_features": 4096,
"out_features": 4096,
"rank": 530
}
},
"max_position_embeddings": 2048,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pad_token_id": 0,
"pretraining_tp": 1,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.43.2",
"use_cache": true,
"vocab_size": 32000
}