Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
- model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes
CHANGED
|
@@ -185,3 +185,20 @@ model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-
|
|
| 185 |
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 186 |
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 187 |
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 186 |
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 187 |
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 188 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 189 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 190 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 191 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 192 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 193 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 194 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 195 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 196 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 197 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 198 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 199 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 200 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 201 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 202 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 203 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 204 |
+
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1db63f3831a187d36d6ec4f72fb5320f916d0d085e77549b23c63ccd84bf8368
|
| 3 |
+
size 1689784
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f811f65da379b9d1b3a6abbdb6df730d47219b3e787609c1fec7939f3e10d025
|
| 3 |
+
size 545834602
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:772c9fba92187c9ebfd823058e28b829dafb972e8e59b882d65bd42b620321db
|
| 3 |
+
size 545865192
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b317179451fe24b75c549d690434927d04c6bb472cfd78ccca85482f2b10de6f
|
| 3 |
+
size 544081948
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1c751b6c0df2f5b5bf495e823d2f1fcdafcd8924e07896b149ad2dd1661eae3
|
| 3 |
+
size 544703761
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:943c0ebc8cc16958fc81dbe39060311e5904ac9126e5635fd44eea3928f37675
|
| 3 |
+
size 499544558
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c879de6c600f3217f4c6d53c49284ad37638ded219bd8253a6f7c2ea95afa0b
|
| 3 |
+
size 500071658
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28ac4525d1eb2b9309dc785749e34cb4e87b0f759c390a90ef2505b4a4191967
|
| 3 |
+
size 499243481
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d28136ba00d058a694757aeb7d7011d6c38877e6dcda751704cf8a8e8b277a5b
|
| 3 |
+
size 499021505
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3b5a54a105a1dd3e69d86dc35e1ded5669b8fb3afa3f602e48da464b47221b8
|
| 3 |
+
size 499399771
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f31130a06b97a565f9b889a0b101851aa476c0438776146662003fba5b4d314
|
| 3 |
+
size 499920266
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abd7c5612f028207a87aafc615b029e8b5b6a5c9d5e61cf72f9e7aaba7a81873
|
| 3 |
+
size 498828368
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:340f12200ebd81dfc26863ccaf019d863edfa6e8cb5c1f01c469db8671e570c3
|
| 3 |
+
size 499132257
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0489c0c5d26bbdffa37300a8dc94819bcddee6b00cf8f1fce9931286d3c0105
|
| 3 |
+
size 499399771
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48a9c1b59ad055dc99e0fbd698e832345a08c79046f945b8c50df9afcc1b439b
|
| 3 |
+
size 499920266
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c10b53d334a2f594df2758c5067d4d20bb55f4ebbfc20d361ae421fb0f332f8
|
| 3 |
+
size 499092089
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26cba09edca7310c2e16225dfd94bea68231aea491bf8f89d696dea88efd2ddd
|
| 3 |
+
size 499658122
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:937eb1d37ad8b82c0459458b4b1f1c7c973b45afbecd0264ab8ff16a60221933
|
| 3 |
+
size 18791
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|
model/dev-dsv3-0.5b-q16-kv2-ep-32-sep-0-top4-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
2025.05.20-01.05.33
|