IvanHU commited on
Commit
d47beaa
·
verified ·
1 Parent(s): 1242dc4

Upload folder using huggingface_hub

Browse files
Files changed (21) hide show
  1. .gitattributes +17 -0
  2. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes CHANGED
@@ -423,3 +423,20 @@ model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16
423
  model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
424
  model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
425
  model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
424
  model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
425
  model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
426
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
427
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
428
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
429
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
430
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
431
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
432
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
433
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
434
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
435
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
436
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
437
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
438
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
439
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
440
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
441
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
442
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a7066eae6f56f19891cc471240481e45534af78ba4a15002b4755ffe9450ac
3
+ size 923219
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c7c606cd2881dcb8b2ca22960d6dabd8bc5653ef66ba72ff910c6601378b53
3
+ size 545101300
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:916ad905fe4adbfdadcfb5e4d2198d4fad1e9848c579dedaab8f37d594189810
3
+ size 545132060
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6b20991e2e2d906ac96320335ef6120ef6a0bca0f256fd0f7d63649af9e3f93
3
+ size 499379568
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef8a395cd9094d814b4c39250007c4989d73d03b07d34c8a3654e195e591f2ca
3
+ size 498618012
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0ef37ab4db8ffc4bc719325dc4edd3d80fe2c042b32799c5125db2e665d407f
3
+ size 498643736
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d50cad17628f08360687777898ca47ef6f01fe410dc29535e206405363d10e4
3
+ size 499459932
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed5087d26de7a5122401f8a01c918deb3f0a808f8b206449eab4577a8795cfa
3
+ size 498478892
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53327b84e2d2cc4b509f5cf61ff41413a219c4becb758aecbe1887b9fce96135
3
+ size 499459932
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeb6dc1b762c95d34e885ab356012fb9e8c527a8bcb3cff51b97769a960f048b
3
+ size 498583868
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a8d187aab7174b58abfd0bd84cb32d870fd6f2ba11a7c56f474ae68397472a
3
+ size 499403292
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ba6079393872e9d11f8db3778717082709aea71f18f917809667268ae188822
3
+ size 544206316
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c073f49b6f3190b678177aa0a8aa936abc5ac2302663ad9304590ac4480e366
3
+ size 544049468
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6990da244fbb7f55c314cb0f761e0077c164daf2f59b9499a533f8ff76ead93
3
+ size 498583868
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e7fd4fbfbb73a8653e4b77f77264d342d935d62984bd1d12240f3a0eb672d0
3
+ size 499403292
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df16ae18d6d5eda66e4b0516d1418344055ef6d1915447e07de0b1a4a8992891
3
+ size 498422252
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7cde3790b3780b658ed5c36b802bedfc21fb0dad3d8fdb680d2fdc5a9d770ca
3
+ size 498353536
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:641f428a50d616b05e13a04138ca84c6b3b439adb3c564df3393b4ca25f10541
3
+ size 18140
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768