Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes
CHANGED
|
@@ -423,3 +423,20 @@ model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16
|
|
| 423 |
model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 424 |
model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 425 |
model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 424 |
model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 425 |
model/dev-mamba_moe-0.5b-q16-kv2-hybrid0.08-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 426 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 427 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 428 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 429 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 430 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 431 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 432 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 433 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 434 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 435 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 436 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 437 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 438 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 439 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 440 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 441 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 442 |
+
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91a7066eae6f56f19891cc471240481e45534af78ba4a15002b4755ffe9450ac
|
| 3 |
+
size 923219
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6c7c606cd2881dcb8b2ca22960d6dabd8bc5653ef66ba72ff910c6601378b53
|
| 3 |
+
size 545101300
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:916ad905fe4adbfdadcfb5e4d2198d4fad1e9848c579dedaab8f37d594189810
|
| 3 |
+
size 545132060
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6b20991e2e2d906ac96320335ef6120ef6a0bca0f256fd0f7d63649af9e3f93
|
| 3 |
+
size 499379568
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef8a395cd9094d814b4c39250007c4989d73d03b07d34c8a3654e195e591f2ca
|
| 3 |
+
size 498618012
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0ef37ab4db8ffc4bc719325dc4edd3d80fe2c042b32799c5125db2e665d407f
|
| 3 |
+
size 498643736
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d50cad17628f08360687777898ca47ef6f01fe410dc29535e206405363d10e4
|
| 3 |
+
size 499459932
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ed5087d26de7a5122401f8a01c918deb3f0a808f8b206449eab4577a8795cfa
|
| 3 |
+
size 498478892
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53327b84e2d2cc4b509f5cf61ff41413a219c4becb758aecbe1887b9fce96135
|
| 3 |
+
size 499459932
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eeb6dc1b762c95d34e885ab356012fb9e8c527a8bcb3cff51b97769a960f048b
|
| 3 |
+
size 498583868
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3a8d187aab7174b58abfd0bd84cb32d870fd6f2ba11a7c56f474ae68397472a
|
| 3 |
+
size 499403292
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ba6079393872e9d11f8db3778717082709aea71f18f917809667268ae188822
|
| 3 |
+
size 544206316
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c073f49b6f3190b678177aa0a8aa936abc5ac2302663ad9304590ac4480e366
|
| 3 |
+
size 544049468
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6990da244fbb7f55c314cb0f761e0077c164daf2f59b9499a533f8ff76ead93
|
| 3 |
+
size 498583868
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5e7fd4fbfbb73a8653e4b77f77264d342d935d62984bd1d12240f3a0eb672d0
|
| 3 |
+
size 499403292
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df16ae18d6d5eda66e4b0516d1418344055ef6d1915447e07de0b1a4a8992891
|
| 3 |
+
size 498422252
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7cde3790b3780b658ed5c36b802bedfc21fb0dad3d8fdb680d2fdc5a9d770ca
|
| 3 |
+
size 498353536
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:641f428a50d616b05e13a04138ca84c6b3b439adb3c564df3393b4ca25f10541
|
| 3 |
+
size 18140
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-2-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|