Training in progress, step 100, checkpoint

Browse files

Files changed (9) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +2 -2
last-checkpoint/rng_state_1.pth +2 -2
last-checkpoint/rng_state_2.pth +2 -2
last-checkpoint/rng_state_3.pth +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +49 -197
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae2287b45658d1cae69b4bfe25e777f022036e8d980148fa7635d454487588a9
 size 2066752

 version https://git-lfs.github.com/spec/v1
+oid sha256:25a257c5b2e34eda80e3d84f1a8cc4247ba163d63b057609915a20d5c03487fe
 size 2066752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:556e1830f3d5b4cc26513099e09450196ba4a64ae97c75af17431421d368e625
 size 2162798

 version https://git-lfs.github.com/spec/v1
+oid sha256:c54a820f98fa07f24b707a5b13ad41e602ccc916761791610cd651856882df16
 size 2162798

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4faa065a55913b65f4b0549e4d93d87e8865c0f6ec216f40a3de4d251a15322a
-size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4651e32e118f1ea1a8e26dfbbe64298593e12e6a71bcd36cb77f04f86d3f86d
+size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10ae8864af9d168bc9a94e5c5625da874d35a133304d7d7414b10c80148467d4
-size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0c1eba909fbb51daca773a25c075f182b4096aff21c9b4ff19dbada2080ac99
+size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ece1ed46b8aa193251efdc1d8393b3bb872b53f6ba93c31cc3efc627b34d74be
-size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:28adb9f06e220aefdc723ea4380a84d42b8bfb87cc53ce65859d55ce1876f51c
+size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3018d94a8b9b3b95a3578032d80b8d3f31c01fab9a615c48039128422aba13ef
-size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:423c49ed521f6986d20d8b29112b383f4b0f3f2e228084ef82c2ad7dcd5d1de8
+size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74485e67705dc36efbfb69b1e54f842e1ff07894d01bb0e36d6d2526a318b300
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,244 +1,96 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 100.0,
   "eval_steps": 200,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.3333333333333333,
-      "eval_loss": 10.375551223754883,
-      "eval_runtime": 1.0009,
-      "eval_samples_per_second": 1499.719,
-      "eval_steps_per_second": 5.995,
       "step": 1
     },
     {
-      "epoch": 3.3333333333333335,
-      "grad_norm": 0.1015625,
-      "learning_rate": 0.00019945218953682734,
-      "loss": 10.3718,
       "step": 10
     },
     {
-      "epoch": 6.666666666666667,
-      "grad_norm": 0.162109375,
-      "learning_rate": 0.00019781476007338058,
-      "loss": 10.3553,
       "step": 20
     },
     {
-      "epoch": 10.0,
-      "grad_norm": 0.291015625,
-      "learning_rate": 0.00019510565162951537,
-      "loss": 10.3251,
       "step": 30
     },
     {
-      "epoch": 13.333333333333334,
-      "grad_norm": 0.330078125,
-      "learning_rate": 0.0001913545457642601,
-      "loss": 10.2723,
       "step": 40
     },
     {
-      "epoch": 16.666666666666668,
-      "grad_norm": 0.328125,
-      "learning_rate": 0.00018660254037844388,
-      "loss": 10.2096,
       "step": 50
     },
     {
-      "epoch": 20.0,
-      "grad_norm": 0.328125,
-      "learning_rate": 0.00018090169943749476,
-      "loss": 10.1499,
       "step": 60
     },
     {
-      "epoch": 23.333333333333332,
       "grad_norm": 0.326171875,
-      "learning_rate": 0.00017431448254773944,
-      "loss": 10.0935,
       "step": 70
     },
     {
-      "epoch": 26.666666666666668,
-      "grad_norm": 0.333984375,
-      "learning_rate": 0.00016691306063588583,
-      "loss": 10.0398,
       "step": 80
     },
     {
-      "epoch": 30.0,
-      "grad_norm": 0.337890625,
-      "learning_rate": 0.00015877852522924732,
-      "loss": 9.9895,
       "step": 90
     },
     {
-      "epoch": 33.333333333333336,
-      "grad_norm": 0.341796875,
-      "learning_rate": 0.00015000000000000001,
-      "loss": 9.9424,
-      "step": 100
-    },
-    {
-      "epoch": 36.666666666666664,
-      "grad_norm": 0.34375,
-      "learning_rate": 0.00014067366430758004,
-      "loss": 9.8995,
-      "step": 110
-    },
-    {
-      "epoch": 40.0,
-      "grad_norm": 0.345703125,
-      "learning_rate": 0.00013090169943749476,
-      "loss": 9.859,
-      "step": 120
-    },
-    {
-      "epoch": 43.333333333333336,
-      "grad_norm": 0.34765625,
-      "learning_rate": 0.00012079116908177593,
-      "loss": 9.8216,
-      "step": 130
-    },
-    {
-      "epoch": 46.666666666666664,
-      "grad_norm": 0.3515625,
-      "learning_rate": 0.00011045284632676536,
-      "loss": 9.7872,
-      "step": 140
-    },
-    {
-      "epoch": 50.0,
-      "grad_norm": 0.357421875,
-      "learning_rate": 0.0001,
-      "loss": 9.7569,
-      "step": 150
-    },
-    {
-      "epoch": 53.333333333333336,
-      "grad_norm": 0.35546875,
-      "learning_rate": 8.954715367323468e-05,
-      "loss": 9.7325,
-      "step": 160
-    },
-    {
-      "epoch": 56.666666666666664,
-      "grad_norm": 0.359375,
-      "learning_rate": 7.920883091822408e-05,
-      "loss": 9.712,
-      "step": 170
-    },
-    {
-      "epoch": 60.0,
-      "grad_norm": 0.359375,
-      "learning_rate": 6.909830056250527e-05,
-      "loss": 9.697,
-      "step": 180
-    },
-    {
-      "epoch": 63.333333333333336,
-      "grad_norm": 0.361328125,
-      "learning_rate": 5.9326335692419995e-05,
-      "loss": 9.6841,
-      "step": 190
-    },
-    {
-      "epoch": 66.66666666666667,
-      "grad_norm": 0.361328125,
-      "learning_rate": 5.000000000000002e-05,
-      "loss": 9.6746,
-      "step": 200
-    },
-    {
-      "epoch": 66.66666666666667,
-      "eval_loss": 9.681697845458984,
-      "eval_runtime": 0.9613,
-      "eval_samples_per_second": 1561.466,
-      "eval_steps_per_second": 6.242,
-      "step": 200
-    },
-    {
-      "epoch": 70.0,
-      "grad_norm": 0.36328125,
-      "learning_rate": 4.12214747707527e-05,
-      "loss": 9.6678,
-      "step": 210
-    },
-    {
-      "epoch": 73.33333333333333,
-      "grad_norm": 0.36328125,
-      "learning_rate": 3.308693936411421e-05,
-      "loss": 9.6641,
-      "step": 220
-    },
-    {
-      "epoch": 76.66666666666667,
-      "grad_norm": 0.36328125,
-      "learning_rate": 2.5685517452260567e-05,
-      "loss": 9.6616,
-      "step": 230
-    },
-    {
-      "epoch": 80.0,
-      "grad_norm": 0.36328125,
-      "learning_rate": 1.9098300562505266e-05,
-      "loss": 9.6605,
-      "step": 240
-    },
-    {
-      "epoch": 83.33333333333333,
-      "grad_norm": 0.365234375,
-      "learning_rate": 1.339745962155613e-05,
-      "loss": 9.6596,
-      "step": 250
-    },
-    {
-      "epoch": 86.66666666666667,
-      "grad_norm": 0.36328125,
-      "learning_rate": 8.645454235739903e-06,
-      "loss": 9.6597,
-      "step": 260
-    },
-    {
-      "epoch": 90.0,
-      "grad_norm": 0.36328125,
-      "learning_rate": 4.8943483704846475e-06,
-      "loss": 9.6595,
-      "step": 270
-    },
-    {
-      "epoch": 93.33333333333333,
-      "grad_norm": 0.361328125,
-      "learning_rate": 2.1852399266194314e-06,
-      "loss": 9.6595,
-      "step": 280
-    },
-    {
-      "epoch": 96.66666666666667,
-      "grad_norm": 0.3671875,
-      "learning_rate": 5.478104631726711e-07,
-      "loss": 9.659,
-      "step": 290
-    },
-    {
-      "epoch": 100.0,
-      "grad_norm": 0.36328125,
       "learning_rate": 0.0,
-      "loss": 9.6596,
-      "step": 300
     }
   ],
   "logging_steps": 10,
-  "max_steps": 300,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 100,
   "save_steps": 200,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -252,7 +104,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 490990259404800.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 16.666666666666668,
   "eval_steps": 200,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.16666666666666666,
+      "eval_loss": 10.376375198364258,
+      "eval_runtime": 2.3455,
+      "eval_samples_per_second": 639.941,
+      "eval_steps_per_second": 5.116,
       "step": 1
     },
     {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.09375,
+      "learning_rate": 0.00019863613034027224,
+      "loss": 10.3756,
       "step": 10
     },
     {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.12353515625,
+      "learning_rate": 0.0001879473751206489,
+      "loss": 10.3632,
       "step": 20
     },
     {
+      "epoch": 5.0,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 0.00016772815716257412,
+      "loss": 10.3433,
       "step": 30
     },
     {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.3203125,
+      "learning_rate": 0.00014016954246529696,
+      "loss": 10.3073,
       "step": 40
     },
     {
+      "epoch": 8.333333333333334,
+      "grad_norm": 0.330078125,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 10.2602,
       "step": 50
     },
     {
+      "epoch": 10.0,
+      "grad_norm": 0.326171875,
+      "learning_rate": 7.54514512859201e-05,
+      "loss": 10.2203,
       "step": 60
     },
     {
+      "epoch": 11.666666666666666,
       "grad_norm": 0.326171875,
+      "learning_rate": 4.530518418775733e-05,
+      "loss": 10.1945,
       "step": 70
     },
     {
+      "epoch": 13.333333333333334,
+      "grad_norm": 0.328125,
+      "learning_rate": 2.1085949060360654e-05,
+      "loss": 10.1812,
       "step": 80
     },
     {
+      "epoch": 15.0,
+      "grad_norm": 0.328125,
+      "learning_rate": 5.418275829936537e-06,
+      "loss": 10.1773,
       "step": 90
     },
     {
+      "epoch": 16.666666666666668,
+      "grad_norm": 0.328125,
       "learning_rate": 0.0,
+      "loss": 10.1767,
+      "step": 100
     }
   ],
   "logging_steps": 10,
+  "max_steps": 100,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 17,
   "save_steps": 200,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 81831709900800.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9629b97ec393ef093e4840cb93f3f7c6eafc6cfa1b3cb229133bf54718ca98cb
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:11cf48786efac37806223cc8882d4253d84ff8c3599dc92c9fda0e12bc8a651f
 size 6840