Qwen
/

Text Generation
Transformers
Safetensors
qwen3_moe
conversational
littlebird13 commited on
Commit
410ce04
·
verified ·
1 Parent(s): 2674a51

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. config.json +38 -0
  3. merges.txt +0 -0
  4. model-00001-of-00095.safetensors +3 -0
  5. model-00002-of-00095.safetensors +3 -0
  6. model-00003-of-00095.safetensors +3 -0
  7. model-00004-of-00095.safetensors +3 -0
  8. model-00005-of-00095.safetensors +3 -0
  9. model-00006-of-00095.safetensors +3 -0
  10. model-00007-of-00095.safetensors +3 -0
  11. model-00008-of-00095.safetensors +3 -0
  12. model-00009-of-00095.safetensors +3 -0
  13. model-00010-of-00095.safetensors +3 -0
  14. model-00011-of-00095.safetensors +3 -0
  15. model-00012-of-00095.safetensors +3 -0
  16. model-00013-of-00095.safetensors +3 -0
  17. model-00014-of-00095.safetensors +3 -0
  18. model-00015-of-00095.safetensors +3 -0
  19. model-00016-of-00095.safetensors +3 -0
  20. model-00017-of-00095.safetensors +3 -0
  21. model-00018-of-00095.safetensors +3 -0
  22. model-00019-of-00095.safetensors +3 -0
  23. model-00020-of-00095.safetensors +3 -0
  24. model-00021-of-00095.safetensors +3 -0
  25. model-00022-of-00095.safetensors +3 -0
  26. model-00023-of-00095.safetensors +3 -0
  27. model-00024-of-00095.safetensors +3 -0
  28. model-00025-of-00095.safetensors +3 -0
  29. model-00026-of-00095.safetensors +3 -0
  30. model-00027-of-00095.safetensors +3 -0
  31. model-00028-of-00095.safetensors +3 -0
  32. model-00029-of-00095.safetensors +3 -0
  33. model-00030-of-00095.safetensors +3 -0
  34. model-00031-of-00095.safetensors +3 -0
  35. model-00032-of-00095.safetensors +3 -0
  36. model-00033-of-00095.safetensors +3 -0
  37. model-00034-of-00095.safetensors +3 -0
  38. model-00035-of-00095.safetensors +3 -0
  39. model-00036-of-00095.safetensors +3 -0
  40. model-00037-of-00095.safetensors +3 -0
  41. model-00038-of-00095.safetensors +3 -0
  42. model-00039-of-00095.safetensors +3 -0
  43. model-00040-of-00095.safetensors +3 -0
  44. model-00041-of-00095.safetensors +3 -0
  45. model-00042-of-00095.safetensors +3 -0
  46. model-00043-of-00095.safetensors +3 -0
  47. model-00044-of-00095.safetensors +3 -0
  48. model-00045-of-00095.safetensors +3 -0
  49. model-00046-of-00095.safetensors +3 -0
  50. model-00047-of-00095.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3MoeForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "decoder_sparse_step": 1,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 12288,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 94,
17
+ "mlp_only_layers": [],
18
+ "model_type": "qwen3_moe",
19
+ "moe_intermediate_size": 1536,
20
+ "norm_topk_prob": true,
21
+ "num_attention_heads": 64,
22
+ "num_experts": 128,
23
+ "num_experts_per_tok": 8,
24
+ "num_hidden_layers": 94,
25
+ "num_key_value_heads": 4,
26
+ "output_router_logits": false,
27
+ "rms_norm_eps": 1e-06,
28
+ "rope_scaling": null,
29
+ "rope_theta": 1000000.0,
30
+ "router_aux_loss_coef": 0.001,
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "torch_dtype": "float16",
34
+ "transformers_version": "4.51.0",
35
+ "use_cache": true,
36
+ "use_sliding_window": false,
37
+ "vocab_size": 151936
38
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f71e6ce18cb21c750f77145de23609ac0f15cb21493fb347c6e6f2327aaf1ba7
3
+ size 4609558401
model-00002-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e722858c98d928427a9c46e43ab0ac8d58875c7e22bfe254d6dc726cf868f421
3
+ size 4975511448
model-00003-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633605c97abc2c934680a5f8292e7e9cef730a6dd22cc9cc054787c5f7128a14
3
+ size 4975511448
model-00004-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07aa8672b4d5ee60688fd28d9ff354581007bc2bbb5b7199de8dc5cf87c2d41
3
+ size 4975511448
model-00005-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10c1050b569aa836e168ac7af557607b735b4d2a89d2abd5e846ca7c340a83f0
3
+ size 4975511426
model-00006-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91acd08b71512b28622286b616ce75a87727d16a812d9f299b4d65ef44a0ea15
3
+ size 4975511448
model-00007-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcff27fafc755f702f462e4ba79dc9ad5cdb1ceb2fb2a726492b521413079f73
3
+ size 4975511448
model-00008-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80e6ba7aef3dba3f35865a68e0bc902765eb4aa9e4c4ff58051f3332e8a0c192
3
+ size 4975511448
model-00009-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f207253a633868a5f2c440a790925e39ea27013c34b879f8632dcbcb0b9d62c2
3
+ size 4975511448
model-00010-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd546c6c17be5c0b3b8c900865e634b9769998bc63a07d222a03c232d83d489
3
+ size 4975511442
model-00011-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4dcaebdf6ccc7101fb88ff7b80cbe7f898e56d4fd548c0a7c6869a4c98bfb8
3
+ size 4975511459
model-00012-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eefd056c0f9901ca9e3f88ec62622c6b69a20673c8444cd3aaaebd1de4de1fb
3
+ size 4975511460
model-00013-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba5a6c69dad2935a3ed0f9564a6731028ee4907755ef6542183e5d463ebddad
3
+ size 4975511460
model-00014-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2116b7eb4bc225f0de2856fd8289b12c6ac5692802445a2ec61cd824b932d6
3
+ size 4975511460
model-00015-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b4f81ba23736b02717e39a07e9d4edc7eb41615410a7a367696b5714cd4b9ed
3
+ size 4975511460
model-00016-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0998a9d72fb3f6cf42cad851900c05a54bc16e331ae6852b9117c4f3b6df6627
3
+ size 4975511430
model-00017-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb79897637c0f9f28383538adf8132791b4ffbd3e5686fa6a57679c7e281142b
3
+ size 4975511448
model-00018-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c923a71e9bca010c76cf7c7a656ed0f00b9be4642aa1dfe5e2c55c26642a949b
3
+ size 4975511460
model-00019-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0159a7fda7844f6dca842f6be5f17d118b805f071808875032bcca744ad5d94
3
+ size 4975511460
model-00020-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f399169e26a677def91bcff33bf80512c0a49942aab84b00456763ff754704
3
+ size 4975511460
model-00021-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e6d2f3cece39d7ffff65aa9c64c51d7e16a58a12abe0229378c016bd52089a
3
+ size 4975511460
model-00022-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af6856693430e86b931366e34b2651449a3e495eeefbc64615df00335fed8125
3
+ size 4975511460
model-00023-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64d9b4cf23f2d5cf5daa83f76bdb6ad8bd2df17f4a956c4a1d806ef5bab87e4
3
+ size 4975511460
model-00024-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:602cf76ea03d602f10ac04d1652c0d630dd835778ab28114db6c9ac856bf28f6
3
+ size 4975511460
model-00025-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57299acd4aa0fac8447f0589325cdb8ca24076b68ab131e08d2b82d06d400cf4
3
+ size 4975511436
model-00026-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edec74547cbc45026bfc8eceed613e58c3237eb73b0974514ad9ca0eab45ab01
3
+ size 4975511460
model-00027-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e56b42d542c053ffe8a7cc2ccfc93bbe097ac9a8542591ef65efdd71c7c81c
3
+ size 4975511460
model-00028-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55326e133c56fb5f7f9c72e0ace1f167329c30db0c6f0f27ad8b8628176e212b
3
+ size 4975511460
model-00029-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:816359d22d7d2aba6d814ca6c34fb8b9cf0933f9e049b045431c43457eaa4ad6
3
+ size 4975511460
model-00030-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:768b96b335ce23221899420d3e858c81bd9fceb8574b630eb919cdc60f967c48
3
+ size 4975511460
model-00031-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1e5ad08c567a35b8d0c83660fee9a9f89ed202ffbbaf7159bfd3a3a2c64fbd6
3
+ size 4975511448
model-00032-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:737199e15ed65319db7a036277e2ec93c562dcdcfbfc15016af0bcd8cb120ba6
3
+ size 4975511438
model-00033-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a70591d2d0b68671a2e64b2116970abdf43f1e0e187530e0ce3ef6100f272efc
3
+ size 4975511438
model-00034-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf58ca02dec615be52f5e44719693565150f5e509be1b2eb3abaabc72efdb1f3
3
+ size 4975511460
model-00035-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb670243e77279d8b374bc67d9d75adaf4271b29ff7e065d8ae02a1653cb795
3
+ size 4975511460
model-00036-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9144a32f7d41355c7d80cdede43699e3fe6d5e9dd7eece71c31e9c5aa4347c28
3
+ size 4975511460
model-00037-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68f2e9e4608407ff75ab5f4b03cd20b127ead61a47577ec4490976781616c026
3
+ size 4975511448
model-00038-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368d72ae2b6998300ff95bc19b86345be0a80e679cf145b40cda06fbf7d1296b
3
+ size 4975511460
model-00039-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccf7385ed8ce0f89f00e75f80c62fed2b49e87d6e9bbca0801a7565d19403cfb
3
+ size 4975511460
model-00040-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b3d9bc08b9ef178e7b4cb937ce3b54ca3adefc7db6dff57bb7b66f0a36aa5d8
3
+ size 4975511460
model-00041-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a74a157772a3188a64593d20a021d0d60cf787fdb53226e2516137531687261d
3
+ size 4975511460
model-00042-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1202865d7b0d992eff0869f0da108b6f8d99c8c37c25c060deb92bc2a73f24c0
3
+ size 4975511454
model-00043-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2868b1f3e0cf4beca0913e559676564e516d2d3dde57cbda28bdf2ac464c7ac6
3
+ size 4975511460
model-00044-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad1928e9d787f26d105f10b2dc9d580c9663d76d7140376230d7b1c3f6ff6ff3
3
+ size 4975511454
model-00045-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69f22b3da7e9573bb723ce25cf0c77344a064f6a5029397ee7d0acd32bf7756
3
+ size 4975511460
model-00046-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede5d2a48409d54a97229f0f599d05d1151901f2d4e30d2dee63bf46138af2cc
3
+ size 4975511460
model-00047-of-00095.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2222c6a1cfc6b25d6142e6a5409cd66015024fffe7ea5d2fdbe4d06f4486182
3
+ size 4975511460