IvanHU commited on
Commit
b1d4e38
·
verified ·
1 Parent(s): 518dabf

Upload folder using huggingface_hub

Browse files
Files changed (21) hide show
  1. .gitattributes +17 -0
  2. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes CHANGED
@@ -117,3 +117,20 @@ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr
117
  model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp filter=lfs diff=lfs merge=lfs -text
118
  model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
119
  model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp filter=lfs diff=lfs merge=lfs -text
118
  model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
119
  model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp filter=lfs diff=lfs merge=lfs -text
120
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
121
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
122
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
123
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
124
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
125
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
126
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
127
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
128
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
129
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
130
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
131
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
132
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
133
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
134
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
135
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
136
+ model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ba5d06568e799d5894878422adbb5fbccee3f0abf84ee0707d15cc1397830d
3
+ size 926502
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cf9b4a5882930d993987dc24dba87c9bcc7e2b096c52e71415bf03aad0b0d3c
3
+ size 545101300
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18e80039d7b4bdb1a01c988f039da83e51adab2d7ec637013935d4a2bc70424
3
+ size 545132060
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1814572d6b20815b34030e5cc065cbda4b3669f6a7788a3d96eb0c552ee1909b
3
+ size 499379568
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae618ccdb13be2a48b0778e398053358f1a5e0c73a37c1fbe3a1f61ba15ea9d
3
+ size 498618012
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70ed61723a6b6795b5735d6ed1545c408b9a838e13c54f437a3808b711c65609
3
+ size 498643736
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d89fcc73312ed983a9597007b9ad19fb689c3b87927a34a2da9c865631458095
3
+ size 499459932
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3d17b0771338daa9b875da3a5dd9c67d9db74a06629f6064861bedd0d4b3b6
3
+ size 498478892
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00d995cad20fe85a433cf88c3b8a938db7f735bbc0614c6b82533884b39cff0
3
+ size 499459932
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:defea3a193c731bf943886b1ad0ee7d568e5f1fcf8a163d57ec1e0f04179204e
3
+ size 498591332
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c0f0a7e02da1abf74084cecb914449ad282cc3bc1f90de46dc64037380fc3dd
3
+ size 499403292
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a035213ae9d0463ea170bc6eb9de5e35ced7fbd29d1256c16259b31e4914418f
3
+ size 544206316
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9db739f8f7bc81ac143803e91a224cd587b10847a3edfa14592de889cbb87fac
3
+ size 544049468
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c8bb35492a3d83cc376148a3bb87b8b282613ff5b2ca309f815d4a63c2c0b46
3
+ size 498591332
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6207244d50ff097df6856e9324a7acb303127817cb6c85d52ff09fbff8a1b3a
3
+ size 499403292
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e0328f5b705f993bd68886d7fb0f44fec9bcb7ec4da05d39ff326cc3e4fa034
3
+ size 498422252
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83a91a984c17a761615efb222f71f019daa00d1aa0ca479d21b7c39c782d30b0
3
+ size 498353536
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a413e328551aa05ee5d4726b8845408099aaefca5a433824bad777fc4dfb4548
3
+ size 18012
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768