Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes
CHANGED
|
@@ -117,3 +117,20 @@ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr
|
|
| 117 |
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 118 |
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 119 |
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 118 |
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 119 |
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0ba5d06568e799d5894878422adbb5fbccee3f0abf84ee0707d15cc1397830d
|
| 3 |
+
size 926502
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cf9b4a5882930d993987dc24dba87c9bcc7e2b096c52e71415bf03aad0b0d3c
|
| 3 |
+
size 545101300
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b18e80039d7b4bdb1a01c988f039da83e51adab2d7ec637013935d4a2bc70424
|
| 3 |
+
size 545132060
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1814572d6b20815b34030e5cc065cbda4b3669f6a7788a3d96eb0c552ee1909b
|
| 3 |
+
size 499379568
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dae618ccdb13be2a48b0778e398053358f1a5e0c73a37c1fbe3a1f61ba15ea9d
|
| 3 |
+
size 498618012
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70ed61723a6b6795b5735d6ed1545c408b9a838e13c54f437a3808b711c65609
|
| 3 |
+
size 498643736
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d89fcc73312ed983a9597007b9ad19fb689c3b87927a34a2da9c865631458095
|
| 3 |
+
size 499459932
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f3d17b0771338daa9b875da3a5dd9c67d9db74a06629f6064861bedd0d4b3b6
|
| 3 |
+
size 498478892
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b00d995cad20fe85a433cf88c3b8a938db7f735bbc0614c6b82533884b39cff0
|
| 3 |
+
size 499459932
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:defea3a193c731bf943886b1ad0ee7d568e5f1fcf8a163d57ec1e0f04179204e
|
| 3 |
+
size 498591332
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c0f0a7e02da1abf74084cecb914449ad282cc3bc1f90de46dc64037380fc3dd
|
| 3 |
+
size 499403292
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a035213ae9d0463ea170bc6eb9de5e35ced7fbd29d1256c16259b31e4914418f
|
| 3 |
+
size 544206316
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9db739f8f7bc81ac143803e91a224cd587b10847a3edfa14592de889cbb87fac
|
| 3 |
+
size 544049468
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c8bb35492a3d83cc376148a3bb87b8b282613ff5b2ca309f815d4a63c2c0b46
|
| 3 |
+
size 498591332
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6207244d50ff097df6856e9324a7acb303127817cb6c85d52ff09fbff8a1b3a
|
| 3 |
+
size 499403292
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e0328f5b705f993bd68886d7fb0f44fec9bcb7ec4da05d39ff326cc3e4fa034
|
| 3 |
+
size 498422252
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83a91a984c17a761615efb222f71f019daa00d1aa0ca479d21b7c39c782d30b0
|
| 3 |
+
size 498353536
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a413e328551aa05ee5d4726b8845408099aaefca5a433824bad777fc4dfb4548
|
| 3 |
+
size 18012
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dev-dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|