{ "absolute_position_embedding": false, "architectures": [ "MotifForCausalLM" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_motif.MotifConfig", "AutoModelForCausalLM": "modeling_motif.MotifForCausalLM" }, "batch_num": null, "bos_token_id": 219396, "continual_training": false, "decoder_split_layers": [], "dim_model_base": 2048, "dim_model_base_attn": 128, "dim_model_base_init": 2048, "dim_model_base_lmh": 1, "dim_model_base_logits": 2048, "dim_model_base_lr": 256, "encoder_split_layers": [], "eos_token_id": 219395, "first_expansion": false, "fused_rope": false, "hidden_act": "poly_norm", "hidden_act_moe": null, "hidden_size": 2048, "hidden_states_shrink": 0.17677669529663687, "init_scale_o": 1, "initializer_range": 2e-05, "intermediate_size": 8192, "loss_reduction": "mean", "max_position_embeddings": 16384, "max_window_layers": 28, "mix_attn": false, "model_type": "Motif", "moe": false, "moe_intermediate_size": null, "moe_layer": false, "muP": false, "multi_token_heads": null, "n_group": null, "n_routed_experts": null, "norm_topk_prob": null, "num_attention_heads": 16, "num_hidden_layers": 32, "num_key_value_heads": 16, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 500000.0, "routed_scaling_factor": null, "scale_emb": 1, "scoring_func": null, "seq_aux": null, "sliding_window": null, "tie_word_embeddings": true, "topk_group": null, "topk_method": null, "torch_dtype": "float32", "transformers_version": "4.46.3", "use_advanced_parallelization": false, "use_bias": false, "use_cache": true, "use_fused_mlp": null, "use_moreh_attention": false, "use_moreh_moe": false, "use_mrope": false, "use_qk_norm": false, "use_sliding_window": false, "vocab_size": 219520, "wesar_weights": false }