Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
- model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +5 -0
.gitattributes
CHANGED
|
@@ -389,3 +389,20 @@ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-102
|
|
| 389 |
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 390 |
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 391 |
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 390 |
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 391 |
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 392 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 393 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 394 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 395 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 396 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 397 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 398 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 399 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 400 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 401 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 402 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 403 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 404 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 405 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 406 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 407 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 408 |
+
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6a8c28719b32e3ced6f2e668f9ee728d9371e18ed9ed36b857d285db838631a
|
| 3 |
+
size 1515517
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77507f796bd530fdf23f06fcbbf101aa5ba1e80da09049ae96c81559927db406
|
| 3 |
+
size 509615457
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06792cafe0fdf939fe5dcee48b5e6c4131d58efee703be4afc0405476421a99c
|
| 3 |
+
size 509638215
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69ee6cbc3ee7cf4189497c11a9cf72883f9d7dbef74b9a1dc70195ca54acc823
|
| 3 |
+
size 509514158
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf3ffcd890f40ccab69a69be28893cc93732c9251d36af5112bf3fdbe0cf4dac
|
| 3 |
+
size 509545117
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2526614977a874c765cc63bb7d90dd1668d943813f600eee92e56210a74c7784
|
| 3 |
+
size 509488525
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f3bcb8a9545646563953b3683da0e46c7a6aa753eb07f7878edf857d1e850b3
|
| 3 |
+
size 509496410
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a4874c7b74de2bf5eb2fe2f9f5fb2d38c468b5e61b446f3bd28ce9388b586b9
|
| 3 |
+
size 509367774
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1239782a2341bb8fbed3f9c5167baa2fc55e5c5d016496fa52dac6874c2b341f
|
| 3 |
+
size 509353581
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58df84dfb3267e4956971d76aac5cda24305afbd0da28084d51f3e557a9060ff
|
| 3 |
+
size 509409739
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4b3ad311a3949ad14ae9f35a06d1ce74fa27ca9d9e80ee583105474e0069a92
|
| 3 |
+
size 509422291
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d92533b7459cf47a1c0d15a30bd5f62ce629cbb03abfe7391a5be360fb1dbcee
|
| 3 |
+
size 509274795
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0a826e1af28fe749593a6d1663b806145038b6685b3a73d5861a9b917efb9c6
|
| 3 |
+
size 509309425
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce60988b6dbce94d7842c5764be43b52696102c96320fa4137f779c95f143a1b
|
| 3 |
+
size 509529558
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acbccc9419bddd09aa96b64688d07f0d5b0d47abe74c1ba2fadb2f74ab27239b
|
| 3 |
+
size 509499531
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:617c6d7a27ae9977f8d65bb36059d8f405463a7e126527f1d094f8779f3813ac
|
| 3 |
+
size 509267230
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c4c08b77dc0deb157cc1e7c7acccff956572c2da23925a279d2b493577e8de2
|
| 3 |
+
size 509295552
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45ea928cc9602cc9e65203bba3959cb15389a3d6584c35758c54be358418f804
|
| 3 |
+
size 18663
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025.05.23-02.10.23
|
| 2 |
+
2025.05.23-02.12.28
|
| 3 |
+
2025.05.23-02.32.48
|
| 4 |
+
2025.05.23-02.36.40
|
| 5 |
+
2025.05.23-02.39.49
|