Upload Qwen3MoeForCausalLM

Files changed (10) hide show

config.json ADDED Viewed

+{
+  "architectures": [
+    "Qwen3MoeForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "decoder_sparse_step": 1,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 6144,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 48,
+  "mlp_only_layers": [],
+  "model_type": "qwen3_moe",
+  "moe_intermediate_size": 768,
+  "norm_topk_prob": true,
+  "num_attention_heads": 32,
+  "num_experts": 128,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 4,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "router_aux_loss_coef": 0.001,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.51.3",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

generation_config.json ADDED Viewed

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.6,
+  "top_k": 20,
+  "top_p": 0.95,
+  "transformers_version": "4.51.3"
+}

model-00001-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcea13a010bcd8df535deae214a1cf0a0a98328f888f6d6c802db2c7c67fdd09
+size 4999936064

model-00002-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2532e5e6508851985f0795b2777bcf905027874244850d58290d15f327ce725
+size 5000053664

model-00003-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad3136587864e271b38b083d326b36b568ffc389ed7d0c40e40916883e9cf804
+size 5000055320

model-00004-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2faeb4b925dff9d300cd8cc6a869c02d8824ddf5a4db8cb729abd54fc78af9d
+size 5000055312

model-00005-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5bfed1a7f8aea81b470858e0dda25da29fa2b172e0c9873c6acc0df38fa457a
+size 4991667744

model-00006-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:248146f34ddd5062bee68e4d909d251ea727fc5d20025c79088a93fd1205f90c
+size 5000046808

model-00007-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:e840822c5b4526c8d122a8c0cdfc73bf2d99638f5b49ea94286f5ee60e638856
+size 2687694912

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff