Training in progress, step 300, checkpoint

Files changed (12) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b574796a04bdd5ba75f557b1e3a82835e37b6cbdc5653fbe0523f7ca548452fd
 size 2066752

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae2287b45658d1cae69b4bfe25e777f022036e8d980148fa7635d454487588a9
 size 2066752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e060cb2727f49ca568161340577be6a7238338c1d2ddeb535174896912bad9a8
 size 2162798

 version https://git-lfs.github.com/spec/v1
+oid sha256:45076cd7d7f407afe1d46ae5fb6859b5620dc1fca8a858a0851ce56bd11f8878
 size 2162798

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e165ab94e4c39d599db6d95416fe6b20ee12e2262e929ee099151db7ce619380
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:4faa065a55913b65f4b0549e4d93d87e8865c0f6ec216f40a3de4d251a15322a
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9cbc25705f026b6aace4fa255cf880974da196014e31a032948b334d2ca7867
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:10ae8864af9d168bc9a94e5c5625da874d35a133304d7d7414b10c80148467d4
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80e9bf66df8c1928f5af3bf47317ecd1b0184ef7e9f158d6f951cc904ae9cbf3
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:ece1ed46b8aa193251efdc1d8393b3bb872b53f6ba93c31cc3efc627b34d74be
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcd1ed78c2e5a24c53b901fdcd13d9f77e900b35f51f226710a851a3df6e162f
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:3018d94a8b9b3b95a3578032d80b8d3f31c01fab9a615c48039128422aba13ef
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea83adac914f4574ef740ff11ec2d9bdb09c46e3947a15a096778da1586b49ee
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:d53200af24bc9fb65fd10ddaa327aeda1804119510e1de3d0cbe9297bfbcade4
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b13ecf7497db0407aadb3d8df50c07202876652d5684788d8266bbef04783351
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:86c1416ffb6786e15b9550c111cad94cc2bc7bb18dea5ef0cdef3475e2015ab2
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a48ddde4793935f0588ab76c54160e3cba7ef48fcd34c2871252358b28649d52
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:47857ef45555e1fc8f192b42aa88a26a9f993ff97303b3c590fa1e726221a814
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99eb96a4a039cb5447a1119d95efed52c77ed0a4b17aaf96b194ac2390ed4870
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:1912f7357d97d0c536c21d15f5ae8d134b73c487be1987df761ea730336d8216
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0985d0257c892696fb7285dad69becde2a1197c31dbd94a987186cfae751de11
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:74485e67705dc36efbfb69b1e54f842e1ff07894d01bb0e36d6d2526a318b300
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 93.33333333333333,
   "eval_steps": 200,
-  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -219,6 +219,20 @@
       "learning_rate": 2.1852399266194314e-06,
       "loss": 9.6595,
       "step": 280
     }
   ],
   "logging_steps": 10,
@@ -233,12 +247,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 458257575444480.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 100.0,
   "eval_steps": 200,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.1852399266194314e-06,
       "loss": 9.6595,
       "step": 280
+    },
+    {
+      "epoch": 96.66666666666667,
+      "grad_norm": 0.3671875,
+      "learning_rate": 5.478104631726711e-07,
+      "loss": 9.659,
+      "step": 290
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 0.36328125,
+      "learning_rate": 0.0,
+      "loss": 9.6596,
+      "step": 300
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 490990259404800.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null