File size: 2,364 Bytes
ec03c1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
{
  "absolute_position_embedding": false,
  "architectures": [
    "MotifForCausalLM"
  ],
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "configuration_motif.MotifConfig",
    "AutoModelForCausalLM": "modeling_motif.MotifForCausalLM"
  },
  "bfloat16": true,
  "bos_token_id": 219396,
  "continual_training": false,
  "decoder_split_layers": [],
  "decontam_attn": false,
  "dim_model_base": 2048,
  "dim_model_base_attn": 128,
  "dim_model_base_init": 2048,
  "dim_model_base_lmh": 1,
  "dim_model_base_logits": 2048,
  "dim_model_base_lr": 256,
  "down_proj_alpha": 0.15625,
  "embed_tokens_alpha": null,
  "encoder_split_layers": [],
  "eos_token_id": 219395,
  "first_expansion": false,
  "fused_rope": true,
  "gate_up_proj_alpha": 0.15625,
  "hidden_act": "poly_norm",
  "hidden_act_moe": null,
  "hidden_size": 2048,
  "hidden_states_shrink": 0.17677669529663687,
  "init_scale_o": 1,
  "initializer_range": 2e-05,
  "input_layernorm_alpha": null,
  "intermediate_size": 8192,
  "k_proj_alpha": 0.15625,
  "lm_head_alpha": null,
  "loss_reduction": "mean",
  "max_position_embeddings": 16384,
  "max_window_layers": 28,
  "mix_attn": false,
  "model_type": "Motif",
  "moe": false,
  "moe_intermediate_size": null,
  "moe_layer": false,
  "muP": false,
  "multi_token_heads": null,
  "n_group": null,
  "n_routed_experts": null,
  "norm_alpha": null,
  "norm_topk_prob": null,
  "num_attention_heads": 16,
  "num_hidden_layers": 32,
  "num_key_value_heads": 16,
  "num_stages": false,
  "o_proj_alpha": 0.15625,
  "post_attention_layernorm_alpha": null,
  "q_proj_alpha": 0.15625,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 500000.0,
  "routed_scaling_factor": null,
  "scale_emb": 1,
  "scoring_func": null,
  "seq_aux": null,
  "sliding_window": null,
  "tensor_parallel": true,
  "tie_word_embeddings": true,
  "topk_group": null,
  "topk_method": null,
  "torch_dtype": "float32",
  "transformers_version": "4.51.3",
  "use_advanced_parallelization": true,
  "use_bias": false,
  "use_cache": false,
  "use_emb_alpha": false,
  "use_fused_mlp": null,
  "use_moreh_attention": true,
  "use_moreh_moe": false,
  "use_mrope": false,
  "use_norm_alpha": false,
  "use_pipeline": false,
  "use_qk_norm": false,
  "use_sliding_window": false,
  "v_proj_alpha": 0.15625,
  "vocab_size": 219520,
  "wesar_weights": false
}