ARC8_Encoder_Mistral / config.json
HippolyteP's picture
Push model using huggingface_hub.
0ce0ddb
raw
history blame contribute delete
740 Bytes
{
"bridge_args": {
"bridge_type": "mlp",
"hidden_dim": 2048,
"in_dim": 3072,
"out_dim": 4096
},
"embedder": null,
"embedder_args": {
"causal_embedder": false,
"compress_rates": [
-8
],
"cont_tok": true,
"n_truncated_layers": 2,
"pooling_module": {
"pool_type": "mean_pooled_queries",
"where": "before"
},
"rec_tok": true,
"train_embedding_mtx": true,
"trained_layers": 27
},
"empty_init": 1,
"llms": [],
"model_args": {
"dim": 3072,
"head_dim": 128,
"hidden_dim": 8192,
"max_batch_size": 1,
"n_heads": 24,
"n_kv_heads": 8,
"n_layers": 28,
"norm_eps": "1e-05",
"rope_theta": 500000.0,
"vocab_size": 128256
}
}