Training in progress, step 800, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +40 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d54e282f291a93f6267d6355da12cf995021e0eba4fbce372d3e8d20ebeef24
 size 80792096

 version https://git-lfs.github.com/spec/v1
+oid sha256:57752341747a334acff7e7c528323be22984cd223aa4991ef1082bda00f5174e
 size 80792096

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea8fea7f9df4ffe845d46f73ae0db4397236e8fde40ed896bfe8a32356e2a21d
 size 161815786

 version https://git-lfs.github.com/spec/v1
+oid sha256:84e4115ebf0a8527b2019b5f89a6a7ac019a87f4f339264b0c1e5bf3e4ecd86c
 size 161815786

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c98b6d1fbc7ac6affec22be72fb3a942a886bb0d0bc66155ec1dc925e7b19b84
 size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd4adf102d00d0cae570ac4ad0c98e14f03cf5c3c9e8e538ac52fc78f0a8a58c
 size 14180

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bae4f04e8c27e8d5a131fd03799d376a4ac32337d535dd00c3175dd79c3deccf
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f7b3a80d33ece2309bf5e8c3963ace6e331a62867cb982ffe0778787e5ad208
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0641917192682144,
   "eval_steps": 800,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -99,6 +99,42 @@
       "learning_rate": 3.7651019814126654e-05,
       "loss": 0.8901,
       "step": 600
     }
   ],
   "logging_steps": 50,
@@ -113,12 +149,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1877807290151076e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.08558895902428587,
   "eval_steps": 800,
+  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.7651019814126654e-05,
       "loss": 0.8901,
       "step": 600
+    },
+    {
+      "epoch": 0.06954102920723226,
+      "grad_norm": 0.4904666244983673,
+      "learning_rate": 2.181685175319702e-05,
+      "loss": 0.8441,
+      "step": 650
+    },
+    {
+      "epoch": 0.07489033914625014,
+      "grad_norm": 0.5129781365394592,
+      "learning_rate": 9.903113209758096e-06,
+      "loss": 0.8732,
+      "step": 700
+    },
+    {
+      "epoch": 0.080239649085268,
+      "grad_norm": 0.4603961706161499,
+      "learning_rate": 2.5072087818176382e-06,
+      "loss": 0.8404,
+      "step": 750
+    },
+    {
+      "epoch": 0.08558895902428587,
+      "grad_norm": 0.4574665129184723,
+      "learning_rate": 0.0,
+      "loss": 0.8934,
+      "step": 800
+    },
+    {
+      "epoch": 0.08558895902428587,
+      "eval_loss": 0.8988075256347656,
+      "eval_runtime": 1557.4072,
+      "eval_samples_per_second": 10.108,
+      "eval_steps_per_second": 1.264,
+      "step": 800
     }
   ],
   "logging_steps": 50,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.58196500264072e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null