Motif-2.6B / config.json
SungminLee's picture
Upload folder using huggingface_hub
ec03c1a verified
raw
history blame
2.36 kB
{
"absolute_position_embedding": false,
"architectures": [
"MotifForCausalLM"
],
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_motif.MotifConfig",
"AutoModelForCausalLM": "modeling_motif.MotifForCausalLM"
},
"bfloat16": true,
"bos_token_id": 219396,
"continual_training": false,
"decoder_split_layers": [],
"decontam_attn": false,
"dim_model_base": 2048,
"dim_model_base_attn": 128,
"dim_model_base_init": 2048,
"dim_model_base_lmh": 1,
"dim_model_base_logits": 2048,
"dim_model_base_lr": 256,
"down_proj_alpha": 0.15625,
"embed_tokens_alpha": null,
"encoder_split_layers": [],
"eos_token_id": 219395,
"first_expansion": false,
"fused_rope": true,
"gate_up_proj_alpha": 0.15625,
"hidden_act": "poly_norm",
"hidden_act_moe": null,
"hidden_size": 2048,
"hidden_states_shrink": 0.17677669529663687,
"init_scale_o": 1,
"initializer_range": 2e-05,
"input_layernorm_alpha": null,
"intermediate_size": 8192,
"k_proj_alpha": 0.15625,
"lm_head_alpha": null,
"loss_reduction": "mean",
"max_position_embeddings": 16384,
"max_window_layers": 28,
"mix_attn": false,
"model_type": "Motif",
"moe": false,
"moe_intermediate_size": null,
"moe_layer": false,
"muP": false,
"multi_token_heads": null,
"n_group": null,
"n_routed_experts": null,
"norm_alpha": null,
"norm_topk_prob": null,
"num_attention_heads": 16,
"num_hidden_layers": 32,
"num_key_value_heads": 16,
"num_stages": false,
"o_proj_alpha": 0.15625,
"post_attention_layernorm_alpha": null,
"q_proj_alpha": 0.15625,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 500000.0,
"routed_scaling_factor": null,
"scale_emb": 1,
"scoring_func": null,
"seq_aux": null,
"sliding_window": null,
"tensor_parallel": true,
"tie_word_embeddings": true,
"topk_group": null,
"topk_method": null,
"torch_dtype": "float32",
"transformers_version": "4.51.3",
"use_advanced_parallelization": true,
"use_bias": false,
"use_cache": false,
"use_emb_alpha": false,
"use_fused_mlp": null,
"use_moreh_attention": true,
"use_moreh_moe": false,
"use_mrope": false,
"use_norm_alpha": false,
"use_pipeline": false,
"use_qk_norm": false,
"use_sliding_window": false,
"v_proj_alpha": 0.15625,
"vocab_size": 219520,
"wesar_weights": false
}