Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
- model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +3 -0
.gitattributes
CHANGED
|
@@ -236,3 +236,20 @@ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16
|
|
| 236 |
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 237 |
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 238 |
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 237 |
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 238 |
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 239 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 240 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 241 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 242 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 243 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 244 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 245 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 246 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 247 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 248 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 249 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 250 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 251 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 252 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 253 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 254 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 255 |
+
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e44efca492f1445c65553ffcb81d511e2a52121994dfb57ebcb17c6197d97f3a
|
| 3 |
+
size 928129
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb857b3869b6d0a266d7e637d8fbe8e527d56eaad15125b628dc46b55ccd0959
|
| 3 |
+
size 549505455
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdb513423a98f7935e878ba19ed3ec9f37456adf4519168ac7bfc9d4f8ac8bde
|
| 3 |
+
size 549551710
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecc41372b7deb8032cec7680e9492e4ea2f9da9d8e8397de319c8b19b3874c08
|
| 3 |
+
size 503495673
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11eef5ad6b4e8dbebb678460503a983e43f43d605a80843220103180f599ceec
|
| 3 |
+
size 502734309
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc0c916575ff944473fa40be40a5942331cdb49983aaedb2167af9059e0d8028
|
| 3 |
+
size 502745370
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33dd736307604c7e7907c817922aab825591c249e86aa69965924f19ebcafe3b
|
| 3 |
+
size 503565689
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55ee6ec459a07865b315c2ab81e319fdc2e5e11a59f12362d90cee5453f58a0c
|
| 3 |
+
size 502575365
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73f72480c5a602c4370849d39f628fb78e5231f037d8ed7b399eb85b06a9237d
|
| 3 |
+
size 503565689
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88ec4fb8df81b70ff40c9434d2745f968192b28f20e9927d7609edc52010c0a0
|
| 3 |
+
size 498660791
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e81a5a032ab0123c98bbd7e64e8ab541f9b8413db157b4a593f858a798ee8848
|
| 3 |
+
size 499474889
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4217074e36ea926e06b000ef90813766f35068cf61e5f919b2c5d99febd484c9
|
| 3 |
+
size 544268949
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed2cc67fb2d8a77a0700f2b6da59b8dbfef60787ff77e5f48162e415ed731c78
|
| 3 |
+
size 544117569
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b79b8d77e0ba3259a638b17c2c0e657e85b89d2f54a30aa9af89260b888c0690
|
| 3 |
+
size 498660791
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77170e722d26ee9b4ada9a143c482760dcfe8176f0bd5b05f39bef207143bf89
|
| 3 |
+
size 499474889
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f36fbb270918ffc002bd6e7d3dafbc841e076347c74c26e272dff04c8c59c71
|
| 3 |
+
size 498484565
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:245a4b473bc12497afc24191b0525da0fed58ee48d9b26d522293ddc97d9823d
|
| 3 |
+
size 498424736
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6668a260912840dc71eb0f1ca03dfb48cbe78276cee63e8cd2550f0a7b4a8036
|
| 3 |
+
size 18535
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|
model/dev-fp8-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025.05.05-16.40.17
|
| 2 |
+
2025.05.06-12.57.29
|
| 3 |
+
2025.05.07-12.12.26
|