Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
- model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes
CHANGED
|
@@ -1221,3 +1221,20 @@ model/nop-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-
|
|
| 1221 |
model/nop-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0003576/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1222 |
model/nop-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0003576/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1223 |
model/nop-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0003576/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1221 |
model/nop-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0003576/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1222 |
model/nop-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0003576/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1223 |
model/nop-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0003576/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1224 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 1225 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1226 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1227 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1228 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1229 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1230 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1231 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1232 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1233 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1234 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1235 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1236 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1237 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1238 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1239 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1240 |
+
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:753adeb8fa01a7c65e3bd9f604edf04215f47e709f838a93692aebc13ee92631
|
| 3 |
+
size 794811
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c04e1bf8d441b2b441ad867a597738eef65a92644b78d5bfbfeda40cc1bdeb4f
|
| 3 |
+
size 358937366
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e9eda3d4547aa8192968a28f34bfbb3f7cce29532696ec4fe9411a8e02b8a4b
|
| 3 |
+
size 359009228
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9df93f1caa495a8f22a9a02cea6f92a1258a54f6231fee9526f109208c70c726
|
| 3 |
+
size 358200592
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd9b922d5266cbe4a0549bb4cffdbfd1b9cee9b1e770cae7ac06022e87cbfdbd
|
| 3 |
+
size 358300939
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5936a43934a6b8a5ceb94175ec3766efff893b138e1114255c8f2d9a0d6660b8
|
| 3 |
+
size 315532321
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ce31088d328417af84a59e50d9e9ae539db4dbea4c09d738e6ca50302ab01b0
|
| 3 |
+
size 315557489
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd97a677457e69576a455c0a8fa729d58b37d9545a9ec83304252f7930879340
|
| 3 |
+
size 315524600
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54dfaf4519646350f4e43cd4684f4a519f5bc75905e7d22a477ce226d959f33e
|
| 3 |
+
size 315548191
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be59eb027b56163002848d4af6b423360f00263bdf3b0c082f585f6d9b0d8c2f
|
| 3 |
+
size 315707661
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:040d269a682f3ef3eac484e4a4607c5a6119c780fd5d3f65eba11fbbba40b5a7
|
| 3 |
+
size 315269084
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1af7293aa3b37e0a29bd2efcd74fe6d66f5ee5a5aa71e7e8146db47277d413a5
|
| 3 |
+
size 315707661
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b575ba7a28da4445cbfef86b1e9597d1291de4ad9b61d3f10257c0c9c33e6136
|
| 3 |
+
size 315138544
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:867bb1af2fd0077f41afbd277091992c29a2a2c3a70a4f795e36cbb3a0fd37c8
|
| 3 |
+
size 315707149
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59cf5d0a3c3c47b6202fde84346ae97f0aeeb40503f893590d88d04a83b9cc90
|
| 3 |
+
size 315288136
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83adf86236cc24364b587ff6da9c712feeab669d7f484a7b993743533eb46f3e
|
| 3 |
+
size 315707149
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29c856083c3401567e8533f83993170c56c35922a3e2e69e37187cb1fbc71bd0
|
| 3 |
+
size 315155635
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:824e58f058ec44b93757a41d9a7c0a42b3588d8407b639b73eed46d2e2dca9fe
|
| 3 |
+
size 18983
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|
model/nop-swa512mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
2025.06.01-03.14.32
|