IvanHU commited on
Commit
a1d2ec9
·
verified ·
1 Parent(s): f5bfd89

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. .gitattributes +17 -0
  2. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
  22. model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes CHANGED
@@ -593,3 +593,20 @@ model/dev-muon-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp
593
  model/dev-muon-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
594
  model/dev-muon-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
595
  model/dev-muon-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  model/dev-muon-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
594
  model/dev-muon-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
595
  model/dev-muon-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
596
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
597
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
598
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
599
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
600
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
601
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
602
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
603
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
604
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
605
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
606
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
607
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
608
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
609
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
610
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
611
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
612
+ model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e2a73ffe3d94a24009ef3a12f7ed0c2c30e1497ddf1af444a76957ffb19edab
3
+ size 1145762
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc3b916f0a06115967c1316f8b3629379d976a43ff33cfd4c1799279b7b06041
3
+ size 687946575
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:013bbc0089cc2b0c2af1105506619aa6e50a930482c8d49e7c768047c71ba794
3
+ size 687944002
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723e58394fabf0d2376bf9b34e375c47893db49379bcf17d23a3f3c04251d532
3
+ size 687423642
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53fbe9e3ab1bd53f45e6490a1f286ddd4ad5beaa4361df8f3822fd6a875bb4be
3
+ size 687437254
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b45297e9983f5f9f3df05e9d15acf60aa9a6a804f5e856f457eef4e7a8b3a60
3
+ size 687641060
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82921096cb8c6c50f8d47ade838c2703c477e0aeb09af5efc82025475c8213c5
3
+ size 687662457
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5dc26f1648bf5ea905eb8c8dac35f463b1925f7b148566c7ab5b0c0a756930
3
+ size 687238284
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f7c0078c96287f7d32e4f239e17202805c5c6af6e06d87ade70672def4eb21
3
+ size 687579804
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:277fade330728d0feb586db9119e08de35339daec651ec9e2a6b64c80df810f3
3
+ size 721091665
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843fe207f7f2f81226d06ab019b91b1474121329f48561df3de40c94b4c79a25
3
+ size 720935490
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6137ba28748d00faff9a9daef231f1aab2d35396335a6f8d395215e93ba2e469
3
+ size 719517224
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934ade5ed6717dd90634e393bc949de8d8b535c55b2bc53765b11f9e7f77c828
3
+ size 720935490
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e677bf956b71d951352b4264d82e45f276e2fe04d3d986118b5ef9bf5539bda7
3
+ size 687238436
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518af4027749f9ebe339c58180825a90da0b37c1a53e35b1a695a55be06dbda3
3
+ size 687842980
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9042af67a18de2215b10b70031dce67fe205edaac946e6bb2fa03388630c0bab
3
+ size 686847861
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44fb7d2c020e82e14791aa7671408e929f0f3ecfa18231fd3ed08aa7d6a0954b
3
+ size 686792827
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f711f7cfc9b47272efd5bf2a9a889778aa2f7fb0d2ebdc857e5930096f0885b
3
+ size 19111
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768
model/dev-muon-gate-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 2025.05.16-01.14.04