| { | |
| "bridge_args": { | |
| "bridge_type": "mlp", | |
| "hidden_dim": 2048, | |
| "in_dim": 3072, | |
| "out_dim": 4096 | |
| }, | |
| "embedder": null, | |
| "embedder_args": { | |
| "causal_embedder": false, | |
| "compress_rates": [ | |
| -8 | |
| ], | |
| "cont_tok": true, | |
| "n_truncated_layers": 2, | |
| "pooling_module": { | |
| "pool_type": "mean_pooled_queries", | |
| "where": "before" | |
| }, | |
| "rec_tok": true, | |
| "train_embedding_mtx": true, | |
| "trained_layers": 27 | |
| }, | |
| "empty_init": 1, | |
| "llms": [], | |
| "model_args": { | |
| "dim": 3072, | |
| "head_dim": 128, | |
| "hidden_dim": 8192, | |
| "max_batch_size": 1, | |
| "n_heads": 24, | |
| "n_kv_heads": 8, | |
| "n_layers": 28, | |
| "norm_eps": "1e-05", | |
| "rope_theta": 500000.0, | |
| "vocab_size": 128256 | |
| } | |
| } |