Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +152 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef787759055fc8abbcb0795d27912d1caf6e8352cc8a90ff7655370737654753
 size 194563400

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6de441aaa8485c64b502422cd0a7f66e324dc19f216e281253db3e91c440d3a
 size 194563400

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f19001108fe67f99004888f0674204d62a444c1b5aad304d6f1009de7f9fc240
 size 99236212

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e36b77114a8d774d669476f30b11995ffb7598179f55e979f3316e32cb2b971
 size 99236212

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d2325dabb3409fc885cf4d67afa405d2e5a220357403ca3239ea16d54563f49
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b674501ac52ce92a0082a78aa20edda450504487603ac5f2a1dcf66f24ee75e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa1e27bc8c4f6272ee858bf97369b9e68f7265e3e9a72207bbd5098643e86719
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d93420319c4318ff13366855f16b6ec61d99b866bdf2a20293a1621b040b36f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2404594493049219,
   "eval_steps": 20,
-  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2087,6 +2087,154 @@
       "eval_samples_per_second": 7.953,
       "eval_steps_per_second": 7.953,
       "step": 280
     }
   ],
   "logging_steps": 1,
@@ -2101,12 +2249,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.946242191130624e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.25763512425527346,
   "eval_steps": 20,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.953,
       "eval_steps_per_second": 7.953,
       "step": 280
+    },
+    {
+      "epoch": 0.2413182330524395,
+      "grad_norm": 2.2641775608062744,
+      "learning_rate": 3.166200546578718e-06,
+      "loss": 1.4545,
+      "step": 281
+    },
+    {
+      "epoch": 0.24217701679995707,
+      "grad_norm": 1.9874449968338013,
+      "learning_rate": 2.8427160470641253e-06,
+      "loss": 1.5352,
+      "step": 282
+    },
+    {
+      "epoch": 0.24303580054747465,
+      "grad_norm": 1.7880980968475342,
+      "learning_rate": 2.5365011072835117e-06,
+      "loss": 1.8229,
+      "step": 283
+    },
+    {
+      "epoch": 0.24389458429499222,
+      "grad_norm": 1.6125706434249878,
+      "learning_rate": 2.2475916629177415e-06,
+      "loss": 1.3356,
+      "step": 284
+    },
+    {
+      "epoch": 0.2447533680425098,
+      "grad_norm": 2.2507503032684326,
+      "learning_rate": 1.9760216187710787e-06,
+      "loss": 1.6077,
+      "step": 285
+    },
+    {
+      "epoch": 0.24561215179002738,
+      "grad_norm": 1.8500958681106567,
+      "learning_rate": 1.7218228447922867e-06,
+      "loss": 1.742,
+      "step": 286
+    },
+    {
+      "epoch": 0.24647093553754496,
+      "grad_norm": 2.065246105194092,
+      "learning_rate": 1.4850251723345196e-06,
+      "loss": 1.575,
+      "step": 287
+    },
+    {
+      "epoch": 0.24732971928506253,
+      "grad_norm": 1.8860384225845337,
+      "learning_rate": 1.2656563906545902e-06,
+      "loss": 1.6296,
+      "step": 288
+    },
+    {
+      "epoch": 0.2481885030325801,
+      "grad_norm": 1.577683448791504,
+      "learning_rate": 1.0637422436516274e-06,
+      "loss": 1.5374,
+      "step": 289
+    },
+    {
+      "epoch": 0.2490472867800977,
+      "grad_norm": 1.9685717821121216,
+      "learning_rate": 8.793064268460604e-07,
+      "loss": 1.6663,
+      "step": 290
+    },
+    {
+      "epoch": 0.24990607052761526,
+      "grad_norm": 2.094910144805908,
+      "learning_rate": 7.123705845987093e-07,
+      "loss": 1.7775,
+      "step": 291
+    },
+    {
+      "epoch": 0.25076485427513284,
+      "grad_norm": 1.6965657472610474,
+      "learning_rate": 5.629543075708176e-07,
+      "loss": 1.4766,
+      "step": 292
+    },
+    {
+      "epoch": 0.2516236380226504,
+      "grad_norm": 1.9982954263687134,
+      "learning_rate": 4.310751304249738e-07,
+      "loss": 1.8619,
+      "step": 293
+    },
+    {
+      "epoch": 0.252482421770168,
+      "grad_norm": 1.7990736961364746,
+      "learning_rate": 3.167485297673411e-07,
+      "loss": 1.5422,
+      "step": 294
+    },
+    {
+      "epoch": 0.2533412055176856,
+      "grad_norm": 2.147096633911133,
+      "learning_rate": 2.1998792233142714e-07,
+      "loss": 1.7929,
+      "step": 295
+    },
+    {
+      "epoch": 0.25419998926520315,
+      "grad_norm": 2.139371871948242,
+      "learning_rate": 1.4080466340349316e-07,
+      "loss": 1.4746,
+      "step": 296
+    },
+    {
+      "epoch": 0.25505877301272073,
+      "grad_norm": 2.0672645568847656,
+      "learning_rate": 7.92080454900701e-08,
+      "loss": 1.8796,
+      "step": 297
+    },
+    {
+      "epoch": 0.2559175567602383,
+      "grad_norm": 1.8566962480545044,
+      "learning_rate": 3.5205297227380855e-08,
+      "loss": 1.5411,
+      "step": 298
+    },
+    {
+      "epoch": 0.2567763405077559,
+      "grad_norm": 2.228825092315674,
+      "learning_rate": 8.801582533035644e-09,
+      "loss": 1.3654,
+      "step": 299
+    },
+    {
+      "epoch": 0.25763512425527346,
+      "grad_norm": 1.640992522239685,
+      "learning_rate": 0.0,
+      "loss": 1.6715,
+      "step": 300
+    },
+    {
+      "epoch": 0.25763512425527346,
+      "eval_loss": 1.7131195068359375,
+      "eval_runtime": 47.8512,
+      "eval_samples_per_second": 7.962,
+      "eval_steps_per_second": 7.962,
+      "step": 300
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.22811663335424e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null