Training in progress, step 10000

Files changed (6) hide show

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ checkpoint-*/

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:292ed5692b25c5ef882949ab1088ccf2a62e2935100cd3609d1745d6a0afdc10
 size 1527847357

 version https://git-lfs.github.com/spec/v1
+oid sha256:f26a40649bc9647fe8a0e325ad5fb3c39bb0b0c253740e1f63871b798533a6c3
 size 1527847357

run.sh CHANGED Viewed

@@ -15,8 +15,8 @@ deepspeed run-ba.py \
 	--torch_compile_mode="max-autotune" \
 	--logging_steps="25" \
 	--learning_rate="3e-6" \
-	--max_steps="9000" \
-	--resume_from_checkpoint="checkpoint-2000" \
 	--output_dir="./" \
 	--per_device_train_batch_size="32" \
 	--gradient_accumulation_steps="1" \

 	--torch_compile_mode="max-autotune" \
 	--logging_steps="25" \
 	--learning_rate="3e-6" \
+	--max_steps="10000" \
+	--resume_from_checkpoint="checkpoint-9000" \
 	--output_dir="./" \
 	--per_device_train_batch_size="32" \
 	--gradient_accumulation_steps="1" \

runs/Dec23_07-12-50_129-146-176-120/1671779621.1881087/events.out.tfevents.1671779621.129-146-176-120.858176.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f2d97c1e6ff8cb3ae406e45145fc8c58c49aaaf040f49725f1ef78e45b7367b
+size 5904

runs/Dec23_07-12-50_129-146-176-120/events.out.tfevents.1671779621.129-146-176-120.858176.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1aae6df4f829c8a91a16ff0205d9139991a6d32e0c426eefca6a23a4fdcf5526
+size 10914

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:405e2f7c140f1d634318230fb26d9b2a3efb9f11f42a90f24dfd6e792f539418
 size 4795

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d88fa534f2fd355b8f3b60e89464586ee3bf259e74d71bb35e6ba447a46b16c
 size 4795