mamba2attn-2.7b / config.json
km
Model release
5e0f47f
raw
history blame contribute delete
599 Bytes
{
"d_model": 2560,
"d_intermediate": 0,
"n_layer": 64,
"vocab_size": 50277,
"ssm_cfg": {
"layer": "Mamba2"
},
"attn_layer_idx": [
9,
18,
27,
36,
45,
56
],
"attn_cfg": {
"causal": true,
"d_conv": 4,
"head_dim": 128,
"num_heads": 30,
"out_proj_bias": false,
"qkv_proj_bias": false,
"rotary_emb_dim": 64
},
"rms_norm": true,
"residual_in_fp32": true,
"fused_add_norm": true,
"pad_vocab_size_multiple": 16,
"tie_embeddings": true
}