Qwen
/

Text Generation
Transformers
Safetensors
qwen3_moe
conversational

yarn scale to 122,880 context length

#41
by nbroad - opened
Files changed (1) hide show
  1. config.json +5 -1
config.json CHANGED
@@ -25,7 +25,11 @@
25
  "num_key_value_heads": 4,
26
  "output_router_logits": false,
27
  "rms_norm_eps": 1e-06,
28
- "rope_scaling": null,
 
 
 
 
29
  "rope_theta": 1000000.0,
30
  "router_aux_loss_coef": 0.001,
31
  "sliding_window": null,
 
25
  "num_key_value_heads": 4,
26
  "output_router_logits": false,
27
  "rms_norm_eps": 1e-06,
28
+ "rope_scaling": {
29
+ "rope_type": "yarn",
30
+ "factor": 3.0,
31
+ "original_max_position_embeddings": 40960
32
+ },
33
  "rope_theta": 1000000.0,
34
  "router_aux_loss_coef": 0.001,
35
  "sliding_window": null,