IvanHU commited on
Commit
d69a58c
·
verified ·
1 Parent(s): 87e4ae4

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. .gitattributes +17 -0
  2. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
  22. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +5 -0
.gitattributes CHANGED
@@ -389,3 +389,20 @@ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-102
389
  model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
390
  model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
391
  model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
  model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
390
  model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
391
  model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
392
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
393
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
394
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
395
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
396
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
397
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
398
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
399
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
400
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
401
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
402
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
403
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
404
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
405
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
406
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
407
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
408
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6a8c28719b32e3ced6f2e668f9ee728d9371e18ed9ed36b857d285db838631a
3
+ size 1515517
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77507f796bd530fdf23f06fcbbf101aa5ba1e80da09049ae96c81559927db406
3
+ size 509615457
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06792cafe0fdf939fe5dcee48b5e6c4131d58efee703be4afc0405476421a99c
3
+ size 509638215
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ee6cbc3ee7cf4189497c11a9cf72883f9d7dbef74b9a1dc70195ca54acc823
3
+ size 509514158
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf3ffcd890f40ccab69a69be28893cc93732c9251d36af5112bf3fdbe0cf4dac
3
+ size 509545117
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2526614977a874c765cc63bb7d90dd1668d943813f600eee92e56210a74c7784
3
+ size 509488525
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f3bcb8a9545646563953b3683da0e46c7a6aa753eb07f7878edf857d1e850b3
3
+ size 509496410
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4874c7b74de2bf5eb2fe2f9f5fb2d38c468b5e61b446f3bd28ce9388b586b9
3
+ size 509367774
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1239782a2341bb8fbed3f9c5167baa2fc55e5c5d016496fa52dac6874c2b341f
3
+ size 509353581
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58df84dfb3267e4956971d76aac5cda24305afbd0da28084d51f3e557a9060ff
3
+ size 509409739
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4b3ad311a3949ad14ae9f35a06d1ce74fa27ca9d9e80ee583105474e0069a92
3
+ size 509422291
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d92533b7459cf47a1c0d15a30bd5f62ce629cbb03abfe7391a5be360fb1dbcee
3
+ size 509274795
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0a826e1af28fe749593a6d1663b806145038b6685b3a73d5861a9b917efb9c6
3
+ size 509309425
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce60988b6dbce94d7842c5764be43b52696102c96320fa4137f779c95f143a1b
3
+ size 509529558
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acbccc9419bddd09aa96b64688d07f0d5b0d47abe74c1ba2fadb2f74ab27239b
3
+ size 509499531
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:617c6d7a27ae9977f8d65bb36059d8f405463a7e126527f1d094f8779f3813ac
3
+ size 509267230
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4c08b77dc0deb157cc1e7c7acccff956572c2da23925a279d2b493577e8de2
3
+ size 509295552
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ea928cc9602cc9e65203bba3959cb15389a3d6584c35758c54be358418f804
3
+ size 18663
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ 2025.05.23-02.10.23
2
+ 2025.05.23-02.12.28
3
+ 2025.05.23-02.32.48
4
+ 2025.05.23-02.36.40
5
+ 2025.05.23-02.39.49