kyutai
/

ARC8_Encoder_Mistral

model_hub_mixin

pytorch_model_hub_mixin

Model card Files Files and versions

ARC8_Encoder_Mistral / config.json

HippolyteP's picture

Push model using huggingface_hub.

0ce0ddb 18 days ago

history blame contribute delete

740 Bytes

	{
	"bridge_args": {
	"bridge_type": "mlp",
	"hidden_dim": 2048,
	"in_dim": 3072,
	"out_dim": 4096
	},
	"embedder": null,
	"embedder_args": {
	"causal_embedder": false,
	"compress_rates": [
	-8
	],
	"cont_tok": true,
	"n_truncated_layers": 2,
	"pooling_module": {
	"pool_type": "mean_pooled_queries",
	"where": "before"
	},
	"rec_tok": true,
	"train_embedding_mtx": true,
	"trained_layers": 27
	},
	"empty_init": 1,
	"llms": [],
	"model_args": {
	"dim": 3072,
	"head_dim": 128,
	"hidden_dim": 8192,
	"max_batch_size": 1,
	"n_heads": 24,
	"n_kv_heads": 8,
	"n_layers": 28,
	"norm_eps": "1e-05",
	"rope_theta": 500000.0,
	"vocab_size": 128256
	}
	}