{ "absolute_position_embedding": false, "architectures": [ "MotifForCausalLM" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_motif.MotifConfig", "AutoModelForCausalLM": "modeling_motif.MotifForCausalLM" }, "bfloat16": true, "bos_token_id": 219396, "continual_training": false, "decoder_split_layers": [], "decontam_attn": false, "dim_model_base": 2048, "dim_model_base_attn": 128, "dim_model_base_init": 2048, "dim_model_base_lmh": 1, "dim_model_base_logits": 2048, "dim_model_base_lr": 256, "down_proj_alpha": 0.15625, "embed_tokens_alpha": null, "encoder_split_layers": [], "eos_token_id": 219395, "first_expansion": false, "fused_rope": true, "gate_up_proj_alpha": 0.15625, "hidden_act": "poly_norm", "hidden_act_moe": null, "hidden_size": 2048, "hidden_states_shrink": 0.17677669529663687, "init_scale_o": 1, "initializer_range": 2e-05, "input_layernorm_alpha": null, "intermediate_size": 8192, "k_proj_alpha": 0.15625, "lm_head_alpha": null, "loss_reduction": "mean", "max_position_embeddings": 16384, "max_window_layers": 28, "mix_attn": false, "model_type": "Motif", "moe": false, "moe_intermediate_size": null, "moe_layer": false, "muP": false, "multi_token_heads": null, "n_group": null, "n_routed_experts": null, "norm_alpha": null, "norm_topk_prob": null, "num_attention_heads": 16, "num_hidden_layers": 32, "num_key_value_heads": 16, "num_stages": false, "o_proj_alpha": 0.15625, "post_attention_layernorm_alpha": null, "q_proj_alpha": 0.15625, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 500000.0, "routed_scaling_factor": null, "scale_emb": 1, "scoring_func": null, "seq_aux": null, "sliding_window": null, "tensor_parallel": true, "tie_word_embeddings": true, "topk_group": null, "topk_method": null, "torch_dtype": "float32", "transformers_version": "4.51.3", "use_advanced_parallelization": true, "use_bias": false, "use_cache": false, "use_emb_alpha": false, "use_fused_mlp": null, "use_moreh_attention": true, "use_moreh_moe": false, "use_mrope": false, "use_norm_alpha": false, "use_pipeline": false, "use_qk_norm": false, "use_sliding_window": false, "v_proj_alpha": 0.15625, "vocab_size": 219520, "wesar_weights": false }