Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c67cb69054ec58aa440717cdfb6bb73eed282ad38d6f1d8876b92f689dce4a3
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:7285228837d70afbfa387f6171edafc7006dac8cd4e41b7cb7140b429aa9a717
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3277342f65874d847ea16268cf966c19181a740039e3a76dc9dfa826e4bb748
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:00488e057a26628043a6015a1be259af639bb0403c85202e8100a743abc9a618
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:835154b511893bb89c75363ca2b5e39fbfa155a8e7afb051eb41d91d2f9e28a0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:380270587d085a36d75982f02881a60466ebe019ea778ff4ac415d05c45d8d2d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61b5a82b906ef255ed934ff84d648a93ce52daed15c002b1d3821ff35829eb10
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d396eda8e5b8059a6497aa364287326ac46b674ce3474769e627232a42976f1e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 3.1905009746551514,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.17830609212481427,
   "eval_steps": 150,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -241,6 +241,119 @@
       "eval_samples_per_second": 13.34,
       "eval_steps_per_second": 3.337,
       "step": 300
     }
   ],
   "logging_steps": 10,
@@ -255,7 +368,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -264,12 +377,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.436915563486249e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 3.1905009746551514,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.2674591381872214,
   "eval_steps": 150,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.34,
       "eval_steps_per_second": 3.337,
       "step": 300
+    },
+    {
+      "epoch": 0.18424962852897475,
+      "grad_norm": 2.2458980083465576,
+      "learning_rate": 9.387451666638906e-05,
+      "loss": 3.2819,
+      "step": 310
+    },
+    {
+      "epoch": 0.1901931649331352,
+      "grad_norm": 2.2853057384490967,
+      "learning_rate": 9.340510045702427e-05,
+      "loss": 3.0085,
+      "step": 320
+    },
+    {
+      "epoch": 0.1961367013372957,
+      "grad_norm": 2.456028461456299,
+      "learning_rate": 9.291962020876654e-05,
+      "loss": 2.8806,
+      "step": 330
+    },
+    {
+      "epoch": 0.20208023774145617,
+      "grad_norm": 2.9242990016937256,
+      "learning_rate": 9.241825559574424e-05,
+      "loss": 2.8353,
+      "step": 340
+    },
+    {
+      "epoch": 0.20802377414561665,
+      "grad_norm": 8.866583824157715,
+      "learning_rate": 9.190119217081996e-05,
+      "loss": 2.8203,
+      "step": 350
+    },
+    {
+      "epoch": 0.2139673105497771,
+      "grad_norm": 2.047957420349121,
+      "learning_rate": 9.136862129691838e-05,
+      "loss": 3.3765,
+      "step": 360
+    },
+    {
+      "epoch": 0.2199108469539376,
+      "grad_norm": 2.7228925228118896,
+      "learning_rate": 9.082074007620356e-05,
+      "loss": 3.086,
+      "step": 370
+    },
+    {
+      "epoch": 0.22585438335809807,
+      "grad_norm": 2.6000123023986816,
+      "learning_rate": 9.025775127713232e-05,
+      "loss": 2.8523,
+      "step": 380
+    },
+    {
+      "epoch": 0.23179791976225855,
+      "grad_norm": 2.826296091079712,
+      "learning_rate": 8.967986325941056e-05,
+      "loss": 2.8088,
+      "step": 390
+    },
+    {
+      "epoch": 0.237741456166419,
+      "grad_norm": 13.53122329711914,
+      "learning_rate": 8.908728989688015e-05,
+      "loss": 2.8237,
+      "step": 400
+    },
+    {
+      "epoch": 0.2436849925705795,
+      "grad_norm": 2.1314334869384766,
+      "learning_rate": 8.848025049836522e-05,
+      "loss": 3.3274,
+      "step": 410
+    },
+    {
+      "epoch": 0.24962852897473997,
+      "grad_norm": 2.417358875274658,
+      "learning_rate": 8.785896972650694e-05,
+      "loss": 3.0393,
+      "step": 420
+    },
+    {
+      "epoch": 0.2555720653789004,
+      "grad_norm": 2.595625877380371,
+      "learning_rate": 8.72236775146167e-05,
+      "loss": 2.9731,
+      "step": 430
+    },
+    {
+      "epoch": 0.26151560178306094,
+      "grad_norm": 2.7309069633483887,
+      "learning_rate": 8.657460898157905e-05,
+      "loss": 2.7837,
+      "step": 440
+    },
+    {
+      "epoch": 0.2674591381872214,
+      "grad_norm": 6.042367458343506,
+      "learning_rate": 8.5912004344835e-05,
+      "loss": 2.7581,
+      "step": 450
+    },
+    {
+      "epoch": 0.2674591381872214,
+      "eval_loss": 3.2376186847686768,
+      "eval_runtime": 212.7494,
+      "eval_samples_per_second": 13.321,
+      "eval_steps_per_second": 3.333,
+      "step": 450
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.658201808189522e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null