Training in progress, step 10000
Browse files
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1527847357
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f26a40649bc9647fe8a0e325ad5fb3c39bb0b0c253740e1f63871b798533a6c3
|
3 |
size 1527847357
|
run.sh
CHANGED
@@ -15,8 +15,8 @@ deepspeed run-ba.py \
|
|
15 |
--torch_compile_mode="max-autotune" \
|
16 |
--logging_steps="25" \
|
17 |
--learning_rate="3e-6" \
|
18 |
-
--max_steps="
|
19 |
-
--resume_from_checkpoint="checkpoint-
|
20 |
--output_dir="./" \
|
21 |
--per_device_train_batch_size="32" \
|
22 |
--gradient_accumulation_steps="1" \
|
|
|
15 |
--torch_compile_mode="max-autotune" \
|
16 |
--logging_steps="25" \
|
17 |
--learning_rate="3e-6" \
|
18 |
+
--max_steps="10000" \
|
19 |
+
--resume_from_checkpoint="checkpoint-9000" \
|
20 |
--output_dir="./" \
|
21 |
--per_device_train_batch_size="32" \
|
22 |
--gradient_accumulation_steps="1" \
|
runs/Dec23_07-12-50_129-146-176-120/1671779621.1881087/events.out.tfevents.1671779621.129-146-176-120.858176.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f2d97c1e6ff8cb3ae406e45145fc8c58c49aaaf040f49725f1ef78e45b7367b
|
3 |
+
size 5904
|
runs/Dec23_07-12-50_129-146-176-120/events.out.tfevents.1671779621.129-146-176-120.858176.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1aae6df4f829c8a91a16ff0205d9139991a6d32e0c426eefca6a23a4fdcf5526
|
3 |
+
size 10914
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4795
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d88fa534f2fd355b8f3b60e89464586ee3bf259e74d71bb35e6ba447a46b16c
|
3 |
size 4795
|