prosecalign
/

clm7b0129-wds-0.8-kendall-onof-ofif-corr-max-2-simpo-max1500-default

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2592f5c652af4c192a71fd852bf1bbdd9b50b567e3a3d9e748449661262ceec
 size 40036488

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b7042ea7509a7ca1469921d0b9066bd22f403280574684a109ffc041ad7544e
 size 40036488

trainer_log.jsonl CHANGED Viewed

@@ -160,3 +160,9 @@
 {"current_steps": 1340, "total_steps": 1500, "loss": 0.4722, "accuracy": 0.9750000238418579, "learning_rate": 1.3905907440629752e-07, "epoch": 1.0993024210094378, "percentage": 89.33, "elapsed_time": "2:55:34", "remaining_time": "0:20:57", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1350, "total_steps": 1500, "loss": 0.6968, "accuracy": 0.987500011920929, "learning_rate": 1.223587092621162e-07, "epoch": 1.1075092326631104, "percentage": 90.0, "elapsed_time": "2:56:45", "remaining_time": "0:19:38", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1350, "total_steps": 1500, "eval_loss": 0.09445749968290329, "epoch": 1.1075092326631104, "percentage": 90.0, "elapsed_time": "2:57:11", "remaining_time": "0:19:41", "throughput": "0.00", "total_tokens": 0}

 {"current_steps": 1340, "total_steps": 1500, "loss": 0.4722, "accuracy": 0.9750000238418579, "learning_rate": 1.3905907440629752e-07, "epoch": 1.0993024210094378, "percentage": 89.33, "elapsed_time": "2:55:34", "remaining_time": "0:20:57", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1350, "total_steps": 1500, "loss": 0.6968, "accuracy": 0.987500011920929, "learning_rate": 1.223587092621162e-07, "epoch": 1.1075092326631104, "percentage": 90.0, "elapsed_time": "2:56:45", "remaining_time": "0:19:38", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1350, "total_steps": 1500, "eval_loss": 0.09445749968290329, "epoch": 1.1075092326631104, "percentage": 90.0, "elapsed_time": "2:57:11", "remaining_time": "0:19:41", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1360, "total_steps": 1500, "loss": 0.5171, "accuracy": 1.0, "learning_rate": 1.067012561698319e-07, "epoch": 1.1157160443167828, "percentage": 90.67, "elapsed_time": "2:58:34", "remaining_time": "0:18:22", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1370, "total_steps": 1500, "loss": 0.3295, "accuracy": 1.0, "learning_rate": 9.209358300585474e-08, "epoch": 1.1239228559704555, "percentage": 91.33, "elapsed_time": "2:59:43", "remaining_time": "0:17:03", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1380, "total_steps": 1500, "loss": 0.371, "accuracy": 1.0, "learning_rate": 7.854209717842231e-08, "epoch": 1.132129667624128, "percentage": 92.0, "elapsed_time": "3:00:55", "remaining_time": "0:15:43", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1390, "total_steps": 1500, "loss": 0.4569, "accuracy": 1.0, "learning_rate": 6.605274281709929e-08, "epoch": 1.1403364792778006, "percentage": 92.67, "elapsed_time": "3:02:10", "remaining_time": "0:14:25", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1400, "total_steps": 1500, "loss": 0.4555, "accuracy": 1.0, "learning_rate": 5.463099816548578e-08, "epoch": 1.1485432909314732, "percentage": 93.33, "elapsed_time": "3:03:23", "remaining_time": "0:13:05", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1400, "total_steps": 1500, "eval_loss": 0.09715119004249573, "epoch": 1.1485432909314732, "percentage": 93.33, "elapsed_time": "3:03:49", "remaining_time": "0:13:07", "throughput": "0.00", "total_tokens": 0}