Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6c7086406fbeee4b24aaeb21d47af038698307ddab376567cb2eeed61df0394
 size 69527352

 version https://git-lfs.github.com/spec/v1
+oid sha256:0523d76765ca4cd622289ee0640efa6aa34e1774dea35dcf0070831154c6fff3
 size 69527352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1c0c599180a4f0135147e65da2a6b10a2561fb8e652e63d2df27c3166385344
 size 35778452

 version https://git-lfs.github.com/spec/v1
+oid sha256:76a57303386d0d82dc7b0f192d8d47fdae4b04fea3235f5fd0e62f279fb19f6c
 size 35778452

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a0fa23faa1fbc39a805d62dff34b0f4106192b8084e55cca7a36ab7f86d8e76a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:423d2bed4877a4fee141ad61c043eb42a11b4a716dd9a654808bedc70a2ada74
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61d92ba00805f178e64a4a7fc7ebf59bf8a4f5750cc8c2818832081930f2c83e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a27b052646dcb561cbd68156c30bd466ce59bda64cf3c8eba9c3c1113af9827c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.0205990076065063,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.14184397163120568,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 65.491,
       "eval_steps_per_second": 16.377,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.961492606877696e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8490656018257141,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.18912529550827423,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 65.491,
       "eval_steps_per_second": 16.377,
       "step": 150
+    },
+    {
+      "epoch": 0.14278959810874706,
+      "grad_norm": 0.45450514554977417,
+      "learning_rate": 4.995066821070679e-05,
+      "loss": 0.1451,
+      "step": 151
+    },
+    {
+      "epoch": 0.14373522458628843,
+      "grad_norm": 0.5641042590141296,
+      "learning_rate": 4.980286753286195e-05,
+      "loss": 0.19,
+      "step": 152
+    },
+    {
+      "epoch": 0.14468085106382977,
+      "grad_norm": 0.6517515182495117,
+      "learning_rate": 4.9557181268217227e-05,
+      "loss": 0.2196,
+      "step": 153
+    },
+    {
+      "epoch": 0.14562647754137115,
+      "grad_norm": 0.5124554634094238,
+      "learning_rate": 4.9214579028215776e-05,
+      "loss": 0.2285,
+      "step": 154
+    },
+    {
+      "epoch": 0.14657210401891252,
+      "grad_norm": 0.4333844482898712,
+      "learning_rate": 4.877641290737884e-05,
+      "loss": 0.2272,
+      "step": 155
+    },
+    {
+      "epoch": 0.1475177304964539,
+      "grad_norm": 0.3877612352371216,
+      "learning_rate": 4.8244412147206284e-05,
+      "loss": 0.3144,
+      "step": 156
+    },
+    {
+      "epoch": 0.14846335697399526,
+      "grad_norm": 0.3211458921432495,
+      "learning_rate": 4.762067631165049e-05,
+      "loss": 0.3578,
+      "step": 157
+    },
+    {
+      "epoch": 0.14940898345153664,
+      "grad_norm": 0.24879109859466553,
+      "learning_rate": 4.690766700109659e-05,
+      "loss": 0.2756,
+      "step": 158
+    },
+    {
+      "epoch": 0.150354609929078,
+      "grad_norm": 0.22662314772605896,
+      "learning_rate": 4.610819813755038e-05,
+      "loss": 0.2839,
+      "step": 159
+    },
+    {
+      "epoch": 0.15130023640661938,
+      "grad_norm": 0.23902210593223572,
+      "learning_rate": 4.522542485937369e-05,
+      "loss": 0.3703,
+      "step": 160
+    },
+    {
+      "epoch": 0.15224586288416075,
+      "grad_norm": 0.23963478207588196,
+      "learning_rate": 4.426283106939474e-05,
+      "loss": 0.3972,
+      "step": 161
+    },
+    {
+      "epoch": 0.15319148936170213,
+      "grad_norm": 0.23576639592647552,
+      "learning_rate": 4.3224215685535294e-05,
+      "loss": 0.3027,
+      "step": 162
+    },
+    {
+      "epoch": 0.1541371158392435,
+      "grad_norm": 0.33861732482910156,
+      "learning_rate": 4.211367764821722e-05,
+      "loss": 0.481,
+      "step": 163
+    },
+    {
+      "epoch": 0.15508274231678487,
+      "grad_norm": 0.34786704182624817,
+      "learning_rate": 4.093559974371725e-05,
+      "loss": 0.441,
+      "step": 164
+    },
+    {
+      "epoch": 0.15602836879432624,
+      "grad_norm": 0.5184177160263062,
+      "learning_rate": 3.969463130731183e-05,
+      "loss": 0.5629,
+      "step": 165
+    },
+    {
+      "epoch": 0.15697399527186762,
+      "grad_norm": 0.38847318291664124,
+      "learning_rate": 3.8395669874474915e-05,
+      "loss": 0.5856,
+      "step": 166
+    },
+    {
+      "epoch": 0.157919621749409,
+      "grad_norm": 0.4933064877986908,
+      "learning_rate": 3.704384185254288e-05,
+      "loss": 0.6682,
+      "step": 167
+    },
+    {
+      "epoch": 0.15886524822695036,
+      "grad_norm": 0.4688006043434143,
+      "learning_rate": 3.564448228912682e-05,
+      "loss": 0.6222,
+      "step": 168
+    },
+    {
+      "epoch": 0.15981087470449173,
+      "grad_norm": 0.7410577535629272,
+      "learning_rate": 3.4203113817116957e-05,
+      "loss": 0.8418,
+      "step": 169
+    },
+    {
+      "epoch": 0.1607565011820331,
+      "grad_norm": 0.7604397535324097,
+      "learning_rate": 3.272542485937369e-05,
+      "loss": 1.1219,
+      "step": 170
+    },
+    {
+      "epoch": 0.16170212765957448,
+      "grad_norm": 0.6462581753730774,
+      "learning_rate": 3.121724717912138e-05,
+      "loss": 0.9533,
+      "step": 171
+    },
+    {
+      "epoch": 0.16264775413711585,
+      "grad_norm": 0.2718241810798645,
+      "learning_rate": 2.9684532864643122e-05,
+      "loss": 0.5001,
+      "step": 172
+    },
+    {
+      "epoch": 0.1635933806146572,
+      "grad_norm": 0.35057616233825684,
+      "learning_rate": 2.8133330839107608e-05,
+      "loss": 0.5601,
+      "step": 173
+    },
+    {
+      "epoch": 0.16453900709219857,
+      "grad_norm": 0.5394273996353149,
+      "learning_rate": 2.656976298823284e-05,
+      "loss": 0.8314,
+      "step": 174
+    },
+    {
+      "epoch": 0.16548463356973994,
+      "grad_norm": 0.3971342444419861,
+      "learning_rate": 2.5e-05,
+      "loss": 0.7177,
+      "step": 175
+    },
+    {
+      "epoch": 0.16643026004728131,
+      "grad_norm": 0.3941769003868103,
+      "learning_rate": 2.3430237011767167e-05,
+      "loss": 0.6324,
+      "step": 176
+    },
+    {
+      "epoch": 0.1673758865248227,
+      "grad_norm": 0.45238277316093445,
+      "learning_rate": 2.186666916089239e-05,
+      "loss": 0.7953,
+      "step": 177
+    },
+    {
+      "epoch": 0.16832151300236406,
+      "grad_norm": 0.5700445771217346,
+      "learning_rate": 2.031546713535688e-05,
+      "loss": 0.859,
+      "step": 178
+    },
+    {
+      "epoch": 0.16926713947990543,
+      "grad_norm": 0.47867706418037415,
+      "learning_rate": 1.8782752820878634e-05,
+      "loss": 0.8502,
+      "step": 179
+    },
+    {
+      "epoch": 0.1702127659574468,
+      "grad_norm": 0.42379331588745117,
+      "learning_rate": 1.7274575140626318e-05,
+      "loss": 0.8364,
+      "step": 180
+    },
+    {
+      "epoch": 0.17115839243498818,
+      "grad_norm": 0.4100590944290161,
+      "learning_rate": 1.5796886182883053e-05,
+      "loss": 0.7299,
+      "step": 181
+    },
+    {
+      "epoch": 0.17210401891252955,
+      "grad_norm": 0.5437663197517395,
+      "learning_rate": 1.4355517710873184e-05,
+      "loss": 0.7789,
+      "step": 182
+    },
+    {
+      "epoch": 0.17304964539007092,
+      "grad_norm": 0.47485315799713135,
+      "learning_rate": 1.2956158147457115e-05,
+      "loss": 0.8491,
+      "step": 183
+    },
+    {
+      "epoch": 0.1739952718676123,
+      "grad_norm": 0.48820245265960693,
+      "learning_rate": 1.1604330125525079e-05,
+      "loss": 0.926,
+      "step": 184
+    },
+    {
+      "epoch": 0.17494089834515367,
+      "grad_norm": 0.32581862807273865,
+      "learning_rate": 1.0305368692688174e-05,
+      "loss": 0.5398,
+      "step": 185
+    },
+    {
+      "epoch": 0.17588652482269504,
+      "grad_norm": 0.4086638391017914,
+      "learning_rate": 9.064400256282757e-06,
+      "loss": 0.8114,
+      "step": 186
+    },
+    {
+      "epoch": 0.1768321513002364,
+      "grad_norm": 0.4679095447063446,
+      "learning_rate": 7.886322351782783e-06,
+      "loss": 0.8871,
+      "step": 187
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 0.4896736145019531,
+      "learning_rate": 6.775784314464717e-06,
+      "loss": 0.81,
+      "step": 188
+    },
+    {
+      "epoch": 0.17872340425531916,
+      "grad_norm": 0.4382385015487671,
+      "learning_rate": 5.737168930605272e-06,
+      "loss": 0.8284,
+      "step": 189
+    },
+    {
+      "epoch": 0.17966903073286053,
+      "grad_norm": 0.5600558519363403,
+      "learning_rate": 4.7745751406263165e-06,
+      "loss": 1.0058,
+      "step": 190
+    },
+    {
+      "epoch": 0.1806146572104019,
+      "grad_norm": 0.5055614113807678,
+      "learning_rate": 3.891801862449629e-06,
+      "loss": 0.8065,
+      "step": 191
+    },
+    {
+      "epoch": 0.18156028368794327,
+      "grad_norm": 0.6402313113212585,
+      "learning_rate": 3.092332998903416e-06,
+      "loss": 1.0421,
+      "step": 192
+    },
+    {
+      "epoch": 0.18250591016548465,
+      "grad_norm": 0.5336678624153137,
+      "learning_rate": 2.379323688349516e-06,
+      "loss": 0.8897,
+      "step": 193
+    },
+    {
+      "epoch": 0.183451536643026,
+      "grad_norm": 0.5516868829727173,
+      "learning_rate": 1.7555878527937164e-06,
+      "loss": 0.8386,
+      "step": 194
+    },
+    {
+      "epoch": 0.18439716312056736,
+      "grad_norm": 0.6657341718673706,
+      "learning_rate": 1.2235870926211619e-06,
+      "loss": 1.1191,
+      "step": 195
+    },
+    {
+      "epoch": 0.18534278959810874,
+      "grad_norm": 0.5750419497489929,
+      "learning_rate": 7.854209717842231e-07,
+      "loss": 1.0913,
+      "step": 196
+    },
+    {
+      "epoch": 0.1862884160756501,
+      "grad_norm": 0.4884861409664154,
+      "learning_rate": 4.4281873178278475e-07,
+      "loss": 0.976,
+      "step": 197
+    },
+    {
+      "epoch": 0.18723404255319148,
+      "grad_norm": 0.5465104579925537,
+      "learning_rate": 1.9713246713805588e-07,
+      "loss": 0.8359,
+      "step": 198
+    },
+    {
+      "epoch": 0.18817966903073285,
+      "grad_norm": 0.6062586903572083,
+      "learning_rate": 4.9331789293211026e-08,
+      "loss": 0.9467,
+      "step": 199
+    },
+    {
+      "epoch": 0.18912529550827423,
+      "grad_norm": 0.7774025201797485,
+      "learning_rate": 0.0,
+      "loss": 1.1784,
+      "step": 200
+    },
+    {
+      "epoch": 0.18912529550827423,
+      "eval_loss": 0.8490656018257141,
+      "eval_runtime": 54.2258,
+      "eval_samples_per_second": 65.707,
+      "eval_steps_per_second": 16.431,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.614235570896896e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null