IvanHU commited on
Commit
b7e86c0
·
verified ·
1 Parent(s): f68b790

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. .gitattributes +17 -0
  2. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
  22. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes CHANGED
@@ -712,3 +712,20 @@ model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep
712
  model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-4e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
713
  model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-4e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
714
  model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-4e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
  model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-4e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
713
  model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-4e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
714
  model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-4e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
715
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
716
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
717
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
718
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
719
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
720
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
721
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
722
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
723
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
724
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
725
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
726
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
727
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
728
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
729
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
730
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
731
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c25f85b1b4c8ae775c22e4fb473716aa53daa5d539fc02f190140cf2919a259
3
+ size 2400622
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c036ac3856e0b5b527f7ae130736322402f2360efa575abfbc02db9b135822
3
+ size 908160098
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03027fb2795306271f1106327590a4d7e210b21be519f3ef399dc760e452f5cd
3
+ size 908241205
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d706ffd8388725837758a1c5eaacb144ae3fe5dab6fc94c62198820807be301
3
+ size 878369213
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8293fbc42096d6c7eed35ad65451c26a9d3dd20b0b9d28650354e3392d614202
3
+ size 878367055
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c5987eb86270c1e654cb4912afe47b28710eb0961ad56eef8b55cbd24786f2
3
+ size 878886700
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b3ab2f8bf010a7fa7fec2750114097bd0c8684cacaf73292381f5d22fc069c8
3
+ size 878899316
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad650b3eb7eace6c623171429c9fed260dea902e5e8f6205464b5bb416db0fa5
3
+ size 878093364
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4758ab7528a66009e6533aa744638827e33cfe48c306da0b793d9817a164e3b3
3
+ size 878074440
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c51639bf4fc0603293a1299584ae8512f287ae14b75b90dd554bcf3dbee385
3
+ size 878716384
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7ba0c0d8b2d57b096dc1b2f8dfe47cc7da9e8c21508103efe86c65673a9372
3
+ size 878710076
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c4d965a09a7641727892f9cc2fc7c3679c978ab792a752fea8bdf676701e77
3
+ size 907368038
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfed9a42b804050e7e7d65cf94691093a7c1247c6a3f9e1b42d223a2101429f8
3
+ size 907181644
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1124dd95a680024ff31f02e15988e905745b1352bd130de600487a306c0ec950
3
+ size 878711653
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:293c2416aee0bc1beee5aa30ef33349cf2e4302386466a1f919bf6f29d6558af
3
+ size 878730513
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e207801344c30e4b55f6b66fd3a8f126303af3a62cc53af06e14449f40c74a9
3
+ size 877955101
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d451ead28f2fce08b92bcea7d81d9f838970e5eed23a75ce3f679934a46a2a31
3
+ size 878007142
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f36f5ebea1038ae442ffdb9c70b91a6cc7551170bc2d0a6632e6d5ac3e1ac15
3
+ size 19239
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-1e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 2025.05.30-00.54.24