Training in progress, step 300, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80511fa69b78427062f5dacff78510fd85d8c97429269198dd63544d1b02c1b7
 size 341141032

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6fd325c9c8f4d7dde06717edc7511070769418008c28d00119c090d6f0b7f68
 size 341141032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc232cd33d5b2c20be632ba576eee501a40579768b1a0340d9f4445d5260f899
 size 682673458

 version https://git-lfs.github.com/spec/v1
+oid sha256:1348db24f3e478f81922e312fe908cb739c4ee9e150ad2f036ab69be712443a9
 size 682673458

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71822174d96d135fc0b9f223212f55b7edf1ef1a0a0d835e55d01eac85ab2f72
 size 16048

 version https://git-lfs.github.com/spec/v1
+oid sha256:8829ed70b2030302656436ad3ce9c06239dc272781dfeb2182af49dba9382ced
 size 16048

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be4680e97d3a9e0871eaabeb03847c28abda43c3c1bd154fe6b5422614b6d297
 size 16048

 version https://git-lfs.github.com/spec/v1
+oid sha256:07ca875b62a1014d462f3dfbedd9672aacac54f278aad3dbfa56502e6d8980ab
 size 16048

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eba37faedf75f6fd1d456545a4ba0c5ac2d9af2a1f6fa4a76be4be9f08e32332
 size 16048

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c68c3fb6c675fb82352432cbb00106f3cc3d144b27bc31460fa7671329b310e
 size 16048

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87b25e0e7c17435010f4f72ba988546bebe35fcdf164a465b8831a4b00564ec1
 size 16048

 version https://git-lfs.github.com/spec/v1
+oid sha256:995034fad8cd98fd8cc7b08e9388f5c2c3ffc6f5ffc8b38220e45ff9f33fe319
 size 16048

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5d33031e10ce21e8b98807574aa705a2f51427de06fae4f92633256df4abfa9
 size 16048

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ff4478e39984dc2091dfafc378a1bae8f663f5ae6eb146d15efe3a27b8e0984
 size 16048

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2f5232d002427e6f13c513ada2abcd0b67df496ceb133569ccef6be1add8249
 size 16048

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac6cbcc62b40b33b44f268770d73327c7327607c19cfdd79854e8fb00ff2459d
 size 16048

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78ec8bd95ebe73d244465d6aea42b4a22c575bb5c107734ebbe57bebfcfa5e1e
 size 16048

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd4125a97cf9417ec9195cefebfe2739888b6110a9ec0efda3deecf9d22e99e1
 size 16048

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79725f1e86991cb73c3f4c1e26c2ad9229e68bb920f9fe25cffd4abbb1ba1468
 size 16048

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f72bd0d8d41bcc931626834be67ca7ce72d528b57449f01d7b596f92453186a
 size 16048

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92f7d6c7692f3a66ef5b01cc1469ac8ce65f6b72724476d8637bb6fd7a582e68
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa3cb9fdd7c70b3d8d2084f26b1ba813283d06e83c4489733dc372cf0bfcf36f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 200,
   "best_metric": 0.09608737379312515,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 4.2513089005235605,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1432,6 +1432,714 @@
       "eval_samples_per_second": 42.213,
       "eval_steps_per_second": 0.331,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1446,7 +2154,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1455,12 +2163,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.381203205633868e+18,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 200,
   "best_metric": 0.09608737379312515,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 6.37696335078534,
   "eval_steps": 100,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 42.213,
       "eval_steps_per_second": 0.331,
       "step": 200
+    },
+    {
+      "epoch": 4.272251308900524,
+      "grad_norm": 0.10457887500524521,
+      "learning_rate": 0.0001654277183310921,
+      "loss": 0.082,
+      "step": 201
+    },
+    {
+      "epoch": 4.293193717277487,
+      "grad_norm": 0.10059670358896255,
+      "learning_rate": 0.00016478324743627101,
+      "loss": 0.0858,
+      "step": 202
+    },
+    {
+      "epoch": 4.31413612565445,
+      "grad_norm": 0.11438459903001785,
+      "learning_rate": 0.00016413410611644825,
+      "loss": 0.084,
+      "step": 203
+    },
+    {
+      "epoch": 4.335078534031414,
+      "grad_norm": 0.06561236828565598,
+      "learning_rate": 0.00016348034117023258,
+      "loss": 0.0822,
+      "step": 204
+    },
+    {
+      "epoch": 4.356020942408377,
+      "grad_norm": 0.11459755152463913,
+      "learning_rate": 0.00016282199972956425,
+      "loss": 0.0826,
+      "step": 205
+    },
+    {
+      "epoch": 4.37696335078534,
+      "grad_norm": 0.07275859266519547,
+      "learning_rate": 0.00016215912925631723,
+      "loss": 0.081,
+      "step": 206
+    },
+    {
+      "epoch": 4.397905759162303,
+      "grad_norm": 0.10688365250825882,
+      "learning_rate": 0.00016149177753887746,
+      "loss": 0.0804,
+      "step": 207
+    },
+    {
+      "epoch": 4.418848167539267,
+      "grad_norm": 0.07950358837842941,
+      "learning_rate": 0.00016081999268869766,
+      "loss": 0.0817,
+      "step": 208
+    },
+    {
+      "epoch": 4.439790575916231,
+      "grad_norm": 0.09884137660264969,
+      "learning_rate": 0.00016014382313682881,
+      "loss": 0.0818,
+      "step": 209
+    },
+    {
+      "epoch": 4.460732984293194,
+      "grad_norm": 0.09773270040750504,
+      "learning_rate": 0.00015946331763042867,
+      "loss": 0.0841,
+      "step": 210
+    },
+    {
+      "epoch": 4.481675392670157,
+      "grad_norm": 0.07958532869815826,
+      "learning_rate": 0.00015877852522924732,
+      "loss": 0.081,
+      "step": 211
+    },
+    {
+      "epoch": 4.50261780104712,
+      "grad_norm": 0.10386484861373901,
+      "learning_rate": 0.0001580894953020904,
+      "loss": 0.0821,
+      "step": 212
+    },
+    {
+      "epoch": 4.523560209424084,
+      "grad_norm": 0.0741114616394043,
+      "learning_rate": 0.00015739627752325996,
+      "loss": 0.081,
+      "step": 213
+    },
+    {
+      "epoch": 4.544502617801047,
+      "grad_norm": 0.10303428024053574,
+      "learning_rate": 0.00015669892186897318,
+      "loss": 0.0811,
+      "step": 214
+    },
+    {
+      "epoch": 4.56544502617801,
+      "grad_norm": 0.07302123308181763,
+      "learning_rate": 0.00015599747861375955,
+      "loss": 0.0824,
+      "step": 215
+    },
+    {
+      "epoch": 4.5863874345549736,
+      "grad_norm": 0.09889702498912811,
+      "learning_rate": 0.00015529199832683635,
+      "loss": 0.0798,
+      "step": 216
+    },
+    {
+      "epoch": 4.607329842931938,
+      "grad_norm": 0.0839948058128357,
+      "learning_rate": 0.00015458253186846301,
+      "loss": 0.084,
+      "step": 217
+    },
+    {
+      "epoch": 4.628272251308901,
+      "grad_norm": 0.07695591449737549,
+      "learning_rate": 0.0001538691303862744,
+      "loss": 0.0823,
+      "step": 218
+    },
+    {
+      "epoch": 4.649214659685864,
+      "grad_norm": 0.09040986001491547,
+      "learning_rate": 0.0001531518453115934,
+      "loss": 0.0783,
+      "step": 219
+    },
+    {
+      "epoch": 4.670157068062827,
+      "grad_norm": 0.09041474014520645,
+      "learning_rate": 0.00015243072835572318,
+      "loss": 0.0825,
+      "step": 220
+    },
+    {
+      "epoch": 4.69109947643979,
+      "grad_norm": 0.08503681421279907,
+      "learning_rate": 0.00015170583150621905,
+      "loss": 0.0818,
+      "step": 221
+    },
+    {
+      "epoch": 4.712041884816754,
+      "grad_norm": 0.08206664770841599,
+      "learning_rate": 0.00015097720702314055,
+      "loss": 0.0799,
+      "step": 222
+    },
+    {
+      "epoch": 4.732984293193717,
+      "grad_norm": 0.08691777288913727,
+      "learning_rate": 0.00015024490743528393,
+      "loss": 0.0818,
+      "step": 223
+    },
+    {
+      "epoch": 4.7539267015706805,
+      "grad_norm": 0.07108013331890106,
+      "learning_rate": 0.00014950898553639505,
+      "loss": 0.0796,
+      "step": 224
+    },
+    {
+      "epoch": 4.774869109947644,
+      "grad_norm": 0.09734012186527252,
+      "learning_rate": 0.00014876949438136347,
+      "loss": 0.0848,
+      "step": 225
+    },
+    {
+      "epoch": 4.795811518324607,
+      "grad_norm": 0.07660133391618729,
+      "learning_rate": 0.00014802648728239742,
+      "loss": 0.0823,
+      "step": 226
+    },
+    {
+      "epoch": 4.816753926701571,
+      "grad_norm": 0.0771099254488945,
+      "learning_rate": 0.0001472800178051805,
+      "loss": 0.0816,
+      "step": 227
+    },
+    {
+      "epoch": 4.837696335078534,
+      "grad_norm": 0.08631302416324615,
+      "learning_rate": 0.00014653013976500975,
+      "loss": 0.0824,
+      "step": 228
+    },
+    {
+      "epoch": 4.858638743455497,
+      "grad_norm": 0.07344726473093033,
+      "learning_rate": 0.00014577690722291622,
+      "loss": 0.0785,
+      "step": 229
+    },
+    {
+      "epoch": 4.879581151832461,
+      "grad_norm": 0.08363424241542816,
+      "learning_rate": 0.00014502037448176734,
+      "loss": 0.0796,
+      "step": 230
+    },
+    {
+      "epoch": 4.900523560209424,
+      "grad_norm": 0.07857396453619003,
+      "learning_rate": 0.00014426059608235208,
+      "loss": 0.0806,
+      "step": 231
+    },
+    {
+      "epoch": 4.9214659685863875,
+      "grad_norm": 0.08594755083322525,
+      "learning_rate": 0.00014349762679944896,
+      "loss": 0.0812,
+      "step": 232
+    },
+    {
+      "epoch": 4.942408376963351,
+      "grad_norm": 0.06925872713327408,
+      "learning_rate": 0.00014273152163787726,
+      "loss": 0.0808,
+      "step": 233
+    },
+    {
+      "epoch": 4.963350785340314,
+      "grad_norm": 0.08095414191484451,
+      "learning_rate": 0.0001419623358285314,
+      "loss": 0.0796,
+      "step": 234
+    },
+    {
+      "epoch": 4.984293193717278,
+      "grad_norm": 0.07342205196619034,
+      "learning_rate": 0.0001411901248243993,
+      "loss": 0.0806,
+      "step": 235
+    },
+    {
+      "epoch": 5.020942408376963,
+      "grad_norm": 0.23300093412399292,
+      "learning_rate": 0.00014041494429656442,
+      "loss": 0.1565,
+      "step": 236
+    },
+    {
+      "epoch": 5.041884816753926,
+      "grad_norm": 0.12772393226623535,
+      "learning_rate": 0.0001396368501301925,
+      "loss": 0.0732,
+      "step": 237
+    },
+    {
+      "epoch": 5.06282722513089,
+      "grad_norm": 0.10052972286939621,
+      "learning_rate": 0.00013885589842050253,
+      "loss": 0.0738,
+      "step": 238
+    },
+    {
+      "epoch": 5.0837696335078535,
+      "grad_norm": 0.1047709733247757,
+      "learning_rate": 0.00013807214546872256,
+      "loss": 0.075,
+      "step": 239
+    },
+    {
+      "epoch": 5.104712041884817,
+      "grad_norm": 0.10601655393838882,
+      "learning_rate": 0.00013728564777803088,
+      "loss": 0.0737,
+      "step": 240
+    },
+    {
+      "epoch": 5.12565445026178,
+      "grad_norm": 0.08795251697301865,
+      "learning_rate": 0.00013649646204948255,
+      "loss": 0.0717,
+      "step": 241
+    },
+    {
+      "epoch": 5.146596858638744,
+      "grad_norm": 0.10085717588663101,
+      "learning_rate": 0.00013570464517792153,
+      "loss": 0.0751,
+      "step": 242
+    },
+    {
+      "epoch": 5.167539267015707,
+      "grad_norm": 0.09512262046337128,
+      "learning_rate": 0.00013491025424787915,
+      "loss": 0.073,
+      "step": 243
+    },
+    {
+      "epoch": 5.18848167539267,
+      "grad_norm": 0.08350583910942078,
+      "learning_rate": 0.0001341133465294585,
+      "loss": 0.0761,
+      "step": 244
+    },
+    {
+      "epoch": 5.209424083769633,
+      "grad_norm": 0.09553380310535431,
+      "learning_rate": 0.00013331397947420576,
+      "loss": 0.0747,
+      "step": 245
+    },
+    {
+      "epoch": 5.230366492146596,
+      "grad_norm": 0.07822317630052567,
+      "learning_rate": 0.00013251221071096836,
+      "loss": 0.0745,
+      "step": 246
+    },
+    {
+      "epoch": 5.2513089005235605,
+      "grad_norm": 0.10339541733264923,
+      "learning_rate": 0.00013170809804174022,
+      "loss": 0.0762,
+      "step": 247
+    },
+    {
+      "epoch": 5.272251308900524,
+      "grad_norm": 0.08298144489526749,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 0.0751,
+      "step": 248
+    },
+    {
+      "epoch": 5.293193717277487,
+      "grad_norm": 0.0966297909617424,
+      "learning_rate": 0.00013009307303400556,
+      "loss": 0.0724,
+      "step": 249
+    },
+    {
+      "epoch": 5.31413612565445,
+      "grad_norm": 0.09534008800983429,
+      "learning_rate": 0.00012928227712765504,
+      "loss": 0.0731,
+      "step": 250
+    },
+    {
+      "epoch": 5.335078534031414,
+      "grad_norm": 0.08681947737932205,
+      "learning_rate": 0.00012846937017123197,
+      "loss": 0.075,
+      "step": 251
+    },
+    {
+      "epoch": 5.356020942408377,
+      "grad_norm": 0.09559471905231476,
+      "learning_rate": 0.00012765441076971712,
+      "loss": 0.0717,
+      "step": 252
+    },
+    {
+      "epoch": 5.37696335078534,
+      "grad_norm": 0.08022520691156387,
+      "learning_rate": 0.00012683745767605846,
+      "loss": 0.0766,
+      "step": 253
+    },
+    {
+      "epoch": 5.397905759162303,
+      "grad_norm": 0.10284972935914993,
+      "learning_rate": 0.0001260185697869353,
+      "loss": 0.0704,
+      "step": 254
+    },
+    {
+      "epoch": 5.418848167539267,
+      "grad_norm": 0.08318132907152176,
+      "learning_rate": 0.00012519780613851254,
+      "loss": 0.0746,
+      "step": 255
+    },
+    {
+      "epoch": 5.439790575916231,
+      "grad_norm": 0.08917541056871414,
+      "learning_rate": 0.00012437522590218417,
+      "loss": 0.0733,
+      "step": 256
+    },
+    {
+      "epoch": 5.460732984293194,
+      "grad_norm": 0.08834797143936157,
+      "learning_rate": 0.00012355088838030776,
+      "loss": 0.075,
+      "step": 257
+    },
+    {
+      "epoch": 5.481675392670157,
+      "grad_norm": 0.08001340925693512,
+      "learning_rate": 0.00012272485300192902,
+      "loss": 0.0731,
+      "step": 258
+    },
+    {
+      "epoch": 5.50261780104712,
+      "grad_norm": 0.07309938222169876,
+      "learning_rate": 0.00012189717931849731,
+      "loss": 0.0719,
+      "step": 259
+    },
+    {
+      "epoch": 5.523560209424084,
+      "grad_norm": 0.07951314002275467,
+      "learning_rate": 0.00012106792699957263,
+      "loss": 0.0741,
+      "step": 260
+    },
+    {
+      "epoch": 5.544502617801047,
+      "grad_norm": 0.07957018166780472,
+      "learning_rate": 0.00012023715582852357,
+      "loss": 0.0738,
+      "step": 261
+    },
+    {
+      "epoch": 5.56544502617801,
+      "grad_norm": 0.076540008187294,
+      "learning_rate": 0.00011940492569821753,
+      "loss": 0.0714,
+      "step": 262
+    },
+    {
+      "epoch": 5.5863874345549736,
+      "grad_norm": 0.08407393842935562,
+      "learning_rate": 0.00011857129660670281,
+      "loss": 0.0777,
+      "step": 263
+    },
+    {
+      "epoch": 5.607329842931938,
+      "grad_norm": 0.0788414478302002,
+      "learning_rate": 0.00011773632865288309,
+      "loss": 0.0732,
+      "step": 264
+    },
+    {
+      "epoch": 5.628272251308901,
+      "grad_norm": 0.0724525973200798,
+      "learning_rate": 0.00011690008203218493,
+      "loss": 0.0783,
+      "step": 265
+    },
+    {
+      "epoch": 5.649214659685864,
+      "grad_norm": 0.0882321372628212,
+      "learning_rate": 0.00011606261703221772,
+      "loss": 0.0781,
+      "step": 266
+    },
+    {
+      "epoch": 5.670157068062827,
+      "grad_norm": 0.07683246582746506,
+      "learning_rate": 0.00011522399402842783,
+      "loss": 0.0706,
+      "step": 267
+    },
+    {
+      "epoch": 5.69109947643979,
+      "grad_norm": 0.07433947920799255,
+      "learning_rate": 0.00011438427347974554,
+      "loss": 0.074,
+      "step": 268
+    },
+    {
+      "epoch": 5.712041884816754,
+      "grad_norm": 0.07308503985404968,
+      "learning_rate": 0.00011354351592422665,
+      "loss": 0.0729,
+      "step": 269
+    },
+    {
+      "epoch": 5.732984293193717,
+      "grad_norm": 0.08603333681821823,
+      "learning_rate": 0.00011270178197468789,
+      "loss": 0.0752,
+      "step": 270
+    },
+    {
+      "epoch": 5.7539267015706805,
+      "grad_norm": 0.07910820841789246,
+      "learning_rate": 0.00011185913231433733,
+      "loss": 0.0752,
+      "step": 271
+    },
+    {
+      "epoch": 5.774869109947644,
+      "grad_norm": 0.07771284133195877,
+      "learning_rate": 0.00011101562769239946,
+      "loss": 0.0739,
+      "step": 272
+    },
+    {
+      "epoch": 5.795811518324607,
+      "grad_norm": 0.08137574046850204,
+      "learning_rate": 0.0001101713289197356,
+      "loss": 0.0704,
+      "step": 273
+    },
+    {
+      "epoch": 5.816753926701571,
+      "grad_norm": 0.07771284133195877,
+      "learning_rate": 0.00010932629686445986,
+      "loss": 0.0766,
+      "step": 274
+    },
+    {
+      "epoch": 5.837696335078534,
+      "grad_norm": 0.07269325852394104,
+      "learning_rate": 0.00010848059244755093,
+      "loss": 0.0738,
+      "step": 275
+    },
+    {
+      "epoch": 5.858638743455497,
+      "grad_norm": 0.09434104710817337,
+      "learning_rate": 0.00010763427663846015,
+      "loss": 0.0754,
+      "step": 276
+    },
+    {
+      "epoch": 5.879581151832461,
+      "grad_norm": 0.07986113429069519,
+      "learning_rate": 0.00010678741045071609,
+      "loss": 0.0727,
+      "step": 277
+    },
+    {
+      "epoch": 5.900523560209424,
+      "grad_norm": 0.08152402937412262,
+      "learning_rate": 0.00010594005493752568,
+      "loss": 0.0763,
+      "step": 278
+    },
+    {
+      "epoch": 5.9214659685863875,
+      "grad_norm": 0.08020524680614471,
+      "learning_rate": 0.00010509227118737298,
+      "loss": 0.0728,
+      "step": 279
+    },
+    {
+      "epoch": 5.942408376963351,
+      "grad_norm": 0.08128321915864944,
+      "learning_rate": 0.00010424412031961484,
+      "loss": 0.0726,
+      "step": 280
+    },
+    {
+      "epoch": 5.963350785340314,
+      "grad_norm": 0.09842672944068909,
+      "learning_rate": 0.00010339566348007487,
+      "loss": 0.0738,
+      "step": 281
+    },
+    {
+      "epoch": 5.984293193717278,
+      "grad_norm": 0.0821060761809349,
+      "learning_rate": 0.00010254696183663511,
+      "loss": 0.0741,
+      "step": 282
+    },
+    {
+      "epoch": 6.020942408376963,
+      "grad_norm": 0.24885313212871552,
+      "learning_rate": 0.00010169807657482623,
+      "loss": 0.1464,
+      "step": 283
+    },
+    {
+      "epoch": 6.041884816753926,
+      "grad_norm": 0.10097737610340118,
+      "learning_rate": 0.00010084906889341656,
+      "loss": 0.0664,
+      "step": 284
+    },
+    {
+      "epoch": 6.06282722513089,
+      "grad_norm": 0.10555354505777359,
+      "learning_rate": 0.0001,
+      "loss": 0.0672,
+      "step": 285
+    },
+    {
+      "epoch": 6.0837696335078535,
+      "grad_norm": 0.10018379241228104,
+      "learning_rate": 9.915093110658346e-05,
+      "loss": 0.0678,
+      "step": 286
+    },
+    {
+      "epoch": 6.104712041884817,
+      "grad_norm": 0.09475495666265488,
+      "learning_rate": 9.830192342517379e-05,
+      "loss": 0.0662,
+      "step": 287
+    },
+    {
+      "epoch": 6.12565445026178,
+      "grad_norm": 0.10079528391361237,
+      "learning_rate": 9.745303816336489e-05,
+      "loss": 0.0645,
+      "step": 288
+    },
+    {
+      "epoch": 6.146596858638744,
+      "grad_norm": 0.08908044546842575,
+      "learning_rate": 9.660433651992514e-05,
+      "loss": 0.0671,
+      "step": 289
+    },
+    {
+      "epoch": 6.167539267015707,
+      "grad_norm": 0.10220327973365784,
+      "learning_rate": 9.57558796803852e-05,
+      "loss": 0.0652,
+      "step": 290
+    },
+    {
+      "epoch": 6.18848167539267,
+      "grad_norm": 0.0913078561425209,
+      "learning_rate": 9.490772881262709e-05,
+      "loss": 0.0654,
+      "step": 291
+    },
+    {
+      "epoch": 6.209424083769633,
+      "grad_norm": 0.09089622646570206,
+      "learning_rate": 9.405994506247432e-05,
+      "loss": 0.0659,
+      "step": 292
+    },
+    {
+      "epoch": 6.230366492146596,
+      "grad_norm": 0.10402899235486984,
+      "learning_rate": 9.321258954928393e-05,
+      "loss": 0.0672,
+      "step": 293
+    },
+    {
+      "epoch": 6.2513089005235605,
+      "grad_norm": 0.09207270294427872,
+      "learning_rate": 9.236572336153986e-05,
+      "loss": 0.0688,
+      "step": 294
+    },
+    {
+      "epoch": 6.272251308900524,
+      "grad_norm": 0.10593326389789581,
+      "learning_rate": 9.151940755244912e-05,
+      "loss": 0.0655,
+      "step": 295
+    },
+    {
+      "epoch": 6.293193717277487,
+      "grad_norm": 0.09085794538259506,
+      "learning_rate": 9.067370313554015e-05,
+      "loss": 0.0663,
+      "step": 296
+    },
+    {
+      "epoch": 6.31413612565445,
+      "grad_norm": 0.106470987200737,
+      "learning_rate": 8.982867108026442e-05,
+      "loss": 0.0659,
+      "step": 297
+    },
+    {
+      "epoch": 6.335078534031414,
+      "grad_norm": 0.08805633336305618,
+      "learning_rate": 8.898437230760058e-05,
+      "loss": 0.0672,
+      "step": 298
+    },
+    {
+      "epoch": 6.356020942408377,
+      "grad_norm": 0.10489070415496826,
+      "learning_rate": 8.814086768566272e-05,
+      "loss": 0.0665,
+      "step": 299
+    },
+    {
+      "epoch": 6.37696335078534,
+      "grad_norm": 0.11271199584007263,
+      "learning_rate": 8.729821802531212e-05,
+      "loss": 0.0685,
+      "step": 300
+    },
+    {
+      "epoch": 6.37696335078534,
+      "eval_loss": 0.09648442268371582,
+      "eval_runtime": 36.2137,
+      "eval_samples_per_second": 42.222,
+      "eval_steps_per_second": 0.331,
+      "step": 300
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.071804808450802e+18,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null