Training in progress, step 230, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +214 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90cf86756df572135f03c0f7b02e368ed11704950657d207cb96e852a67974c0
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:67781916596e6cc2f2fac9ce421bfaa2f0acabebdb950101d67bb016d1ab4fa2
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c49898add5a21782109bcea17e094f097a2730fa5548b1a51e188fe8213625b0
 size 328468404

 version https://git-lfs.github.com/spec/v1
+oid sha256:abe2309002b0ed2be8f8a2fbe4dd663580b8c9421017827ac1ff4450f7ad68fc
 size 328468404

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c1ef41deb3863cb4e4ca9f914e8469725fb857615262228d60028b63b24c217
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c1050e20827c53ea3b03c4a195af3abf926f5fe4840e67e6093132eeba079cf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a94566b02bf28490606f2fb9cb895b81b322144d6c373560bb6cc082ffc05f1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe9cf7f3790980c81262679dcebf11d7a584b88476e5dbdf63e7cf839a7cd620
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.10709268599748611,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.8705114254624592,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1431,6 +1431,216 @@
       "eval_samples_per_second": 12.048,
       "eval_steps_per_second": 3.02,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1454,12 +1664,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.238351480868045e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.10709268599748611,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.001088139281828,
   "eval_steps": 100,
+  "global_step": 230,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.048,
       "eval_steps_per_second": 3.02,
       "step": 200
+    },
+    {
+      "epoch": 0.8748639825897715,
+      "grad_norm": 0.04893573746085167,
+      "learning_rate": 4.632065271606756e-06,
+      "loss": 0.0464,
+      "step": 201
+    },
+    {
+      "epoch": 0.8792165397170838,
+      "grad_norm": 0.054525069892406464,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.0653,
+      "step": 202
+    },
+    {
+      "epoch": 0.8835690968443961,
+      "grad_norm": 0.06352470815181732,
+      "learning_rate": 4.023611372427471e-06,
+      "loss": 0.07,
+      "step": 203
+    },
+    {
+      "epoch": 0.8879216539717084,
+      "grad_norm": 0.05715398117899895,
+      "learning_rate": 3.734784976300165e-06,
+      "loss": 0.0565,
+      "step": 204
+    },
+    {
+      "epoch": 0.8922742110990207,
+      "grad_norm": 0.06614100933074951,
+      "learning_rate": 3.4563125677897932e-06,
+      "loss": 0.0706,
+      "step": 205
+    },
+    {
+      "epoch": 0.8966267682263329,
+      "grad_norm": 0.07538996636867523,
+      "learning_rate": 3.18825646801314e-06,
+      "loss": 0.081,
+      "step": 206
+    },
+    {
+      "epoch": 0.9009793253536452,
+      "grad_norm": 0.07782811671495438,
+      "learning_rate": 2.930676666954846e-06,
+      "loss": 0.0761,
+      "step": 207
+    },
+    {
+      "epoch": 0.9053318824809575,
+      "grad_norm": 0.07623429596424103,
+      "learning_rate": 2.6836308100417873e-06,
+      "loss": 0.0808,
+      "step": 208
+    },
+    {
+      "epoch": 0.9096844396082698,
+      "grad_norm": 0.06650186330080032,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.0677,
+      "step": 209
+    },
+    {
+      "epoch": 0.9140369967355821,
+      "grad_norm": 0.0781843438744545,
+      "learning_rate": 2.221359710692961e-06,
+      "loss": 0.0835,
+      "step": 210
+    },
+    {
+      "epoch": 0.9183895538628944,
+      "grad_norm": 0.08650253713130951,
+      "learning_rate": 2.006237922855553e-06,
+      "loss": 0.0895,
+      "step": 211
+    },
+    {
+      "epoch": 0.9227421109902068,
+      "grad_norm": 0.08480913937091827,
+      "learning_rate": 1.8018569652073381e-06,
+      "loss": 0.0849,
+      "step": 212
+    },
+    {
+      "epoch": 0.9270946681175191,
+      "grad_norm": 0.07606717199087143,
+      "learning_rate": 1.6082625774666794e-06,
+      "loss": 0.0723,
+      "step": 213
+    },
+    {
+      "epoch": 0.9314472252448314,
+      "grad_norm": 0.08265078812837601,
+      "learning_rate": 1.4254980853566247e-06,
+      "loss": 0.0782,
+      "step": 214
+    },
+    {
+      "epoch": 0.9357997823721437,
+      "grad_norm": 0.07758332788944244,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 0.0664,
+      "step": 215
+    },
+    {
+      "epoch": 0.940152339499456,
+      "grad_norm": 0.08595745265483856,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.0833,
+      "step": 216
+    },
+    {
+      "epoch": 0.9445048966267682,
+      "grad_norm": 0.09878195077180862,
+      "learning_rate": 9.42580830291373e-07,
+      "loss": 0.1022,
+      "step": 217
+    },
+    {
+      "epoch": 0.9488574537540805,
+      "grad_norm": 0.1008504182100296,
+      "learning_rate": 8.035205700685167e-07,
+      "loss": 0.0786,
+      "step": 218
+    },
+    {
+      "epoch": 0.9532100108813928,
+      "grad_norm": 0.10530146956443787,
+      "learning_rate": 6.75470303823933e-07,
+      "loss": 0.0882,
+      "step": 219
+    },
+    {
+      "epoch": 0.9575625680087051,
+      "grad_norm": 0.11855965107679367,
+      "learning_rate": 5.584586887435739e-07,
+      "loss": 0.0952,
+      "step": 220
+    },
+    {
+      "epoch": 0.9619151251360174,
+      "grad_norm": 0.129081130027771,
+      "learning_rate": 4.52511911603265e-07,
+      "loss": 0.0809,
+      "step": 221
+    },
+    {
+      "epoch": 0.9662676822633297,
+      "grad_norm": 0.16669493913650513,
+      "learning_rate": 3.576536829081323e-07,
+      "loss": 0.1204,
+      "step": 222
+    },
+    {
+      "epoch": 0.970620239390642,
+      "grad_norm": 0.16019439697265625,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.1158,
+      "step": 223
+    },
+    {
+      "epoch": 0.9749727965179543,
+      "grad_norm": 0.2115449756383896,
+      "learning_rate": 2.012853002380466e-07,
+      "loss": 0.1401,
+      "step": 224
+    },
+    {
+      "epoch": 0.9793253536452666,
+      "grad_norm": 0.23436211049556732,
+      "learning_rate": 1.3981014094099353e-07,
+      "loss": 0.1593,
+      "step": 225
+    },
+    {
+      "epoch": 0.9836779107725789,
+      "grad_norm": 0.24148912727832794,
+      "learning_rate": 8.949351161324227e-08,
+      "loss": 0.1317,
+      "step": 226
+    },
+    {
+      "epoch": 0.9880304678998912,
+      "grad_norm": 0.30770501494407654,
+      "learning_rate": 5.0346672934270534e-08,
+      "loss": 0.162,
+      "step": 227
+    },
+    {
+      "epoch": 0.9923830250272034,
+      "grad_norm": 0.2943860590457916,
+      "learning_rate": 2.237838582483387e-08,
+      "loss": 0.1882,
+      "step": 228
+    },
+    {
+      "epoch": 0.9967355821545157,
+      "grad_norm": 0.3997349441051483,
+      "learning_rate": 5.594909486328348e-09,
+      "loss": 0.2187,
+      "step": 229
+    },
+    {
+      "epoch": 1.001088139281828,
+      "grad_norm": 0.5895123481750488,
+      "learning_rate": 0.0,
+      "loss": 0.2661,
+      "step": 230
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.7320134588104704e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null