{ "embed_dim": 1024, "num_heads": 4, "dropout": 0.05, "bias": true, "use_layernorm": true, "use_MLP": true, "MLP_h_size": 2048, "MLP_output_size": 1024, "use_residual_MLP": "concat", "MLP_type": "swiglu_h+swiglu_d", "h2_size": 1024, "ignore_cls_as_kv": true, "expand_emb_dim_to": 0, "compress_output_dim_to": 0 }