Training in progress, step 260, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff29c933a4daa7713f522963be2552c9a83673e6fd2520f8b71038da53fd571c
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b75a0248b725001d4b737202cee6a588454275e75688b8e92b0209cf50f24e3
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d473e2521456a932c3a8bb5ef31bc35a268ac5a17cdd188761fe0e6562831150
-size 85723284

 version https://git-lfs.github.com/spec/v1
+oid sha256:25de9e1655df64ff1b16783b4ac59b40894a9b461138ea8b6756424d9a691538
+size 85723732

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e80491df64e0b9dcefd07abe6128b6730507af86d32592d0d460e696bde58ca2
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f43da92b779de0b28d2e2154fe90a421bc3b606235826492ff897080fccad57
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b624ad5498b7e0d2d006e00677be92437acefdd1a52a75ba879cb0aa4c84e989
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:81c68d24d7db1be76d9250e1c398a314ef42ae8cead5a2f3e0a2eedee6a3ff25
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52e750e93299ff7110c9d9215f366603bb8954578c837fc22b7489b8b2c487f0
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3da237555441ba12bc3bc95ed870f2781bb439000b7e27bc6dc4e54c5877136f
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d6d759d06e1fc36a3b0fb326b3e5dd3c3ec13a446a5ad06234c7e5737edb1f5
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:052a2e4cc9bb53aeaf3d416afc45769a53ef735852debe2282fc1430de68ba46
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98e9d7325633d70b723b343ee3f62a4930c7e185a70840fb60ffc5f171ed44ed
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2707c2e7e0bedcedb09182ad986f1fcf600ab66b24375e84af16e5408d76950
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.564652738565782,
   "eval_steps": 222,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1773,6 +1773,76 @@
       "learning_rate": 4.4874455403137514e-05,
       "loss": 1.7377,
       "step": 250
     }
   ],
   "logging_steps": 1,
@@ -1792,7 +1862,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.483774567120896e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5872388481084133,
   "eval_steps": 222,
+  "global_step": 260,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.4874455403137514e-05,
       "loss": 1.7377,
       "step": 250
+    },
+    {
+      "epoch": 0.5669113495200452,
+      "grad_norm": 1.191891074180603,
+      "learning_rate": 4.449627291255184e-05,
+      "loss": 1.6349,
+      "step": 251
+    },
+    {
+      "epoch": 0.5691699604743083,
+      "grad_norm": 0.5702697038650513,
+      "learning_rate": 4.411840888163449e-05,
+      "loss": 1.3089,
+      "step": 252
+    },
+    {
+      "epoch": 0.5714285714285714,
+      "grad_norm": 0.4630747139453888,
+      "learning_rate": 4.3740885174560736e-05,
+      "loss": 1.4976,
+      "step": 253
+    },
+    {
+      "epoch": 0.5736871823828346,
+      "grad_norm": 0.6172465682029724,
+      "learning_rate": 4.336372363581391e-05,
+      "loss": 1.4348,
+      "step": 254
+    },
+    {
+      "epoch": 0.5759457933370977,
+      "grad_norm": 0.6135768294334412,
+      "learning_rate": 4.298694608892134e-05,
+      "loss": 1.7773,
+      "step": 255
+    },
+    {
+      "epoch": 0.5782044042913608,
+      "grad_norm": 1.1833115816116333,
+      "learning_rate": 4.2610574335191615e-05,
+      "loss": 1.427,
+      "step": 256
+    },
+    {
+      "epoch": 0.5804630152456239,
+      "grad_norm": 0.8085864186286926,
+      "learning_rate": 4.2234630152453116e-05,
+      "loss": 1.3841,
+      "step": 257
+    },
+    {
+      "epoch": 0.5827216261998871,
+      "grad_norm": 0.5345907807350159,
+      "learning_rate": 4.185913529379381e-05,
+      "loss": 1.1692,
+      "step": 258
+    },
+    {
+      "epoch": 0.5849802371541502,
+      "grad_norm": 0.7415188550949097,
+      "learning_rate": 4.1484111486302704e-05,
+      "loss": 1.2728,
+      "step": 259
+    },
+    {
+      "epoch": 0.5872388481084133,
+      "grad_norm": 0.5333618521690369,
+      "learning_rate": 4.110958042981255e-05,
+      "loss": 1.4616,
+      "step": 260
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.5431255498057318e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null