Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:842be841a84ffd9675d3d829649da3dea60a6f157c1a9e805cbfbdd13df6068f
 size 201353800

 version https://git-lfs.github.com/spec/v1
+oid sha256:1dc391235528041d2fc64c517de0de5a35d6e73291b9575d80be3c327678412e
 size 201353800

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:358595746d9606f881c058f17ec77646119f5640649202e7d0bc913ea2c24d89
 size 102463162

 version https://git-lfs.github.com/spec/v1
+oid sha256:b759274f0d85a5e93ff596ccde0cd1ab9ef3d3a503db9c3340bcdec7982c30b6
 size 102463162

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2c91750a152d4f302e0a66761e580c0302f41c4bcf24c35e7cf34d49c3d4b8a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea1cdf60dba2aa43fbb9730847255fb9348f31329cac03f340a7414262a148e2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d02401bbb152e15f1818919a637ab3b8552f6baef86b8a3bc1f3db18041aa551
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce6af8c2b755ab309a73060b69c0ffea28d5cf03ddedb3dc24064e45dedc183e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.008372417651116848,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.24552429667519182,
   "eval_steps": 100,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2139,6 +2139,714 @@
       "eval_samples_per_second": 6.896,
       "eval_steps_per_second": 1.726,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2153,7 +2861,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -2162,12 +2870,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.485099296096256e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.008372417651116848,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.3273657289002558,
   "eval_steps": 100,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.896,
       "eval_steps_per_second": 1.726,
       "step": 300
+    },
+    {
+      "epoch": 0.24634271099744245,
+      "grad_norm": 1.1538922786712646,
+      "learning_rate": 0.000193028873651704,
+      "loss": 0.0128,
+      "step": 301
+    },
+    {
+      "epoch": 0.2471611253196931,
+      "grad_norm": 1.4959121942520142,
+      "learning_rate": 0.000192981449234947,
+      "loss": 0.0151,
+      "step": 302
+    },
+    {
+      "epoch": 0.24797953964194375,
+      "grad_norm": 0.7326957583427429,
+      "learning_rate": 0.00019293386991696648,
+      "loss": 0.0074,
+      "step": 303
+    },
+    {
+      "epoch": 0.24879795396419438,
+      "grad_norm": 5.472940921783447,
+      "learning_rate": 0.00019288613577702655,
+      "loss": 0.1211,
+      "step": 304
+    },
+    {
+      "epoch": 0.249616368286445,
+      "grad_norm": 2.403130531311035,
+      "learning_rate": 0.00019283824689464926,
+      "loss": 0.059,
+      "step": 305
+    },
+    {
+      "epoch": 0.25043478260869567,
+      "grad_norm": 4.317528247833252,
+      "learning_rate": 0.00019279020334961447,
+      "loss": 0.1937,
+      "step": 306
+    },
+    {
+      "epoch": 0.2512531969309463,
+      "grad_norm": 2.334576368331909,
+      "learning_rate": 0.0001927420052219597,
+      "loss": 0.1942,
+      "step": 307
+    },
+    {
+      "epoch": 0.25207161125319694,
+      "grad_norm": 2.2626686096191406,
+      "learning_rate": 0.00019269365259198,
+      "loss": 0.0893,
+      "step": 308
+    },
+    {
+      "epoch": 0.2528900255754476,
+      "grad_norm": 1.1748579740524292,
+      "learning_rate": 0.0001926451455402277,
+      "loss": 0.011,
+      "step": 309
+    },
+    {
+      "epoch": 0.2537084398976982,
+      "grad_norm": 0.6484130620956421,
+      "learning_rate": 0.00019259648414751265,
+      "loss": 0.0094,
+      "step": 310
+    },
+    {
+      "epoch": 0.25452685421994886,
+      "grad_norm": 0.47816258668899536,
+      "learning_rate": 0.00019254766849490152,
+      "loss": 0.0062,
+      "step": 311
+    },
+    {
+      "epoch": 0.25534526854219947,
+      "grad_norm": 3.412865161895752,
+      "learning_rate": 0.00019249869866371817,
+      "loss": 0.1235,
+      "step": 312
+    },
+    {
+      "epoch": 0.2561636828644501,
+      "grad_norm": 12.466361999511719,
+      "learning_rate": 0.00019244957473554323,
+      "loss": 0.0131,
+      "step": 313
+    },
+    {
+      "epoch": 0.2569820971867008,
+      "grad_norm": 1.225807547569275,
+      "learning_rate": 0.00019240029679221408,
+      "loss": 0.0215,
+      "step": 314
+    },
+    {
+      "epoch": 0.2578005115089514,
+      "grad_norm": 0.0476732961833477,
+      "learning_rate": 0.00019235086491582463,
+      "loss": 0.0017,
+      "step": 315
+    },
+    {
+      "epoch": 0.25861892583120205,
+      "grad_norm": 4.701657772064209,
+      "learning_rate": 0.0001923012791887253,
+      "loss": 0.0362,
+      "step": 316
+    },
+    {
+      "epoch": 0.2594373401534527,
+      "grad_norm": 18.314184188842773,
+      "learning_rate": 0.00019225153969352275,
+      "loss": 0.4503,
+      "step": 317
+    },
+    {
+      "epoch": 0.2602557544757033,
+      "grad_norm": 2.99334716796875,
+      "learning_rate": 0.00019220164651307986,
+      "loss": 0.0408,
+      "step": 318
+    },
+    {
+      "epoch": 0.261074168797954,
+      "grad_norm": 3.7102837562561035,
+      "learning_rate": 0.00019215159973051552,
+      "loss": 0.1288,
+      "step": 319
+    },
+    {
+      "epoch": 0.2618925831202046,
+      "grad_norm": 2.6803882122039795,
+      "learning_rate": 0.0001921013994292045,
+      "loss": 0.0465,
+      "step": 320
+    },
+    {
+      "epoch": 0.26271099744245524,
+      "grad_norm": 8.001237869262695,
+      "learning_rate": 0.00019205104569277733,
+      "loss": 0.0932,
+      "step": 321
+    },
+    {
+      "epoch": 0.2635294117647059,
+      "grad_norm": 0.7996736764907837,
+      "learning_rate": 0.00019200053860512014,
+      "loss": 0.0281,
+      "step": 322
+    },
+    {
+      "epoch": 0.2643478260869565,
+      "grad_norm": 0.10956323891878128,
+      "learning_rate": 0.0001919498782503746,
+      "loss": 0.0034,
+      "step": 323
+    },
+    {
+      "epoch": 0.26516624040920717,
+      "grad_norm": 10.366631507873535,
+      "learning_rate": 0.0001918990647129376,
+      "loss": 0.054,
+      "step": 324
+    },
+    {
+      "epoch": 0.2659846547314578,
+      "grad_norm": 16.79122543334961,
+      "learning_rate": 0.0001918480980774613,
+      "loss": 0.3747,
+      "step": 325
+    },
+    {
+      "epoch": 0.26680306905370843,
+      "grad_norm": 16.79612159729004,
+      "learning_rate": 0.00019179697842885293,
+      "loss": 0.1583,
+      "step": 326
+    },
+    {
+      "epoch": 0.2676214833759591,
+      "grad_norm": 0.3873645067214966,
+      "learning_rate": 0.0001917457058522746,
+      "loss": 0.0056,
+      "step": 327
+    },
+    {
+      "epoch": 0.2684398976982097,
+      "grad_norm": 1.963756799697876,
+      "learning_rate": 0.00019169428043314314,
+      "loss": 0.073,
+      "step": 328
+    },
+    {
+      "epoch": 0.26925831202046036,
+      "grad_norm": 9.859981536865234,
+      "learning_rate": 0.00019164270225713008,
+      "loss": 0.0644,
+      "step": 329
+    },
+    {
+      "epoch": 0.270076726342711,
+      "grad_norm": 3.775178909301758,
+      "learning_rate": 0.0001915909714101614,
+      "loss": 0.1595,
+      "step": 330
+    },
+    {
+      "epoch": 0.2708951406649616,
+      "grad_norm": 2.9009556770324707,
+      "learning_rate": 0.00019153908797841742,
+      "loss": 0.0377,
+      "step": 331
+    },
+    {
+      "epoch": 0.2717135549872123,
+      "grad_norm": 11.263628005981445,
+      "learning_rate": 0.0001914870520483327,
+      "loss": 0.061,
+      "step": 332
+    },
+    {
+      "epoch": 0.27253196930946294,
+      "grad_norm": 0.01919454149901867,
+      "learning_rate": 0.00019143486370659573,
+      "loss": 0.0008,
+      "step": 333
+    },
+    {
+      "epoch": 0.27335038363171354,
+      "grad_norm": 2.147348642349243,
+      "learning_rate": 0.00019138252304014907,
+      "loss": 0.2487,
+      "step": 334
+    },
+    {
+      "epoch": 0.2741687979539642,
+      "grad_norm": 0.05764702707529068,
+      "learning_rate": 0.0001913300301361889,
+      "loss": 0.0016,
+      "step": 335
+    },
+    {
+      "epoch": 0.2749872122762148,
+      "grad_norm": 0.025458738207817078,
+      "learning_rate": 0.00019127738508216516,
+      "loss": 0.0009,
+      "step": 336
+    },
+    {
+      "epoch": 0.27580562659846547,
+      "grad_norm": 2.147672653198242,
+      "learning_rate": 0.00019122458796578114,
+      "loss": 0.1389,
+      "step": 337
+    },
+    {
+      "epoch": 0.27662404092071613,
+      "grad_norm": 2.0472524166107178,
+      "learning_rate": 0.0001911716388749935,
+      "loss": 0.0667,
+      "step": 338
+    },
+    {
+      "epoch": 0.27744245524296673,
+      "grad_norm": 0.15356223285198212,
+      "learning_rate": 0.0001911185378980121,
+      "loss": 0.0025,
+      "step": 339
+    },
+    {
+      "epoch": 0.2782608695652174,
+      "grad_norm": 0.0540032684803009,
+      "learning_rate": 0.00019106528512329978,
+      "loss": 0.002,
+      "step": 340
+    },
+    {
+      "epoch": 0.27907928388746805,
+      "grad_norm": 3.762300968170166,
+      "learning_rate": 0.00019101188063957235,
+      "loss": 0.1637,
+      "step": 341
+    },
+    {
+      "epoch": 0.27989769820971866,
+      "grad_norm": 1.0720164775848389,
+      "learning_rate": 0.0001909583245357983,
+      "loss": 0.0124,
+      "step": 342
+    },
+    {
+      "epoch": 0.2807161125319693,
+      "grad_norm": 3.95150089263916,
+      "learning_rate": 0.00019090461690119868,
+      "loss": 0.021,
+      "step": 343
+    },
+    {
+      "epoch": 0.2815345268542199,
+      "grad_norm": 5.743108749389648,
+      "learning_rate": 0.00019085075782524703,
+      "loss": 0.1296,
+      "step": 344
+    },
+    {
+      "epoch": 0.2823529411764706,
+      "grad_norm": 1.5119516849517822,
+      "learning_rate": 0.00019079674739766917,
+      "loss": 0.0188,
+      "step": 345
+    },
+    {
+      "epoch": 0.28317135549872124,
+      "grad_norm": 0.20997177064418793,
+      "learning_rate": 0.0001907425857084431,
+      "loss": 0.0049,
+      "step": 346
+    },
+    {
+      "epoch": 0.28398976982097185,
+      "grad_norm": 3.7278308868408203,
+      "learning_rate": 0.00019068827284779874,
+      "loss": 0.1293,
+      "step": 347
+    },
+    {
+      "epoch": 0.2848081841432225,
+      "grad_norm": 3.974945545196533,
+      "learning_rate": 0.0001906338089062179,
+      "loss": 0.0574,
+      "step": 348
+    },
+    {
+      "epoch": 0.28562659846547317,
+      "grad_norm": 34.30356216430664,
+      "learning_rate": 0.00019057919397443403,
+      "loss": 0.8184,
+      "step": 349
+    },
+    {
+      "epoch": 0.2864450127877238,
+      "grad_norm": 0.13904468715190887,
+      "learning_rate": 0.0001905244281434322,
+      "loss": 0.0013,
+      "step": 350
+    },
+    {
+      "epoch": 0.28726342710997443,
+      "grad_norm": 1.397051215171814,
+      "learning_rate": 0.00019046951150444882,
+      "loss": 0.0348,
+      "step": 351
+    },
+    {
+      "epoch": 0.28808184143222504,
+      "grad_norm": 0.16247797012329102,
+      "learning_rate": 0.00019041444414897153,
+      "loss": 0.0031,
+      "step": 352
+    },
+    {
+      "epoch": 0.2889002557544757,
+      "grad_norm": 0.17214684188365936,
+      "learning_rate": 0.0001903592261687391,
+      "loss": 0.0038,
+      "step": 353
+    },
+    {
+      "epoch": 0.28971867007672636,
+      "grad_norm": 0.03287775442004204,
+      "learning_rate": 0.00019030385765574114,
+      "loss": 0.0008,
+      "step": 354
+    },
+    {
+      "epoch": 0.29053708439897696,
+      "grad_norm": 1.5617440938949585,
+      "learning_rate": 0.00019024833870221817,
+      "loss": 0.128,
+      "step": 355
+    },
+    {
+      "epoch": 0.2913554987212276,
+      "grad_norm": 0.4113420248031616,
+      "learning_rate": 0.00019019266940066117,
+      "loss": 0.0022,
+      "step": 356
+    },
+    {
+      "epoch": 0.2921739130434783,
+      "grad_norm": 9.879963874816895,
+      "learning_rate": 0.00019013684984381176,
+      "loss": 0.1339,
+      "step": 357
+    },
+    {
+      "epoch": 0.2929923273657289,
+      "grad_norm": 2.0313758850097656,
+      "learning_rate": 0.00019008088012466179,
+      "loss": 0.0137,
+      "step": 358
+    },
+    {
+      "epoch": 0.29381074168797955,
+      "grad_norm": 2.5029163360595703,
+      "learning_rate": 0.00019002476033645326,
+      "loss": 0.0366,
+      "step": 359
+    },
+    {
+      "epoch": 0.29462915601023015,
+      "grad_norm": 2.537306785583496,
+      "learning_rate": 0.00018996849057267815,
+      "loss": 0.0249,
+      "step": 360
+    },
+    {
+      "epoch": 0.2954475703324808,
+      "grad_norm": 0.12636743485927582,
+      "learning_rate": 0.0001899120709270784,
+      "loss": 0.0024,
+      "step": 361
+    },
+    {
+      "epoch": 0.2962659846547315,
+      "grad_norm": 2.858910083770752,
+      "learning_rate": 0.00018985550149364552,
+      "loss": 0.0341,
+      "step": 362
+    },
+    {
+      "epoch": 0.2970843989769821,
+      "grad_norm": 0.029364800080657005,
+      "learning_rate": 0.0001897987823666207,
+      "loss": 0.0006,
+      "step": 363
+    },
+    {
+      "epoch": 0.29790281329923274,
+      "grad_norm": 4.783391952514648,
+      "learning_rate": 0.00018974191364049434,
+      "loss": 0.242,
+      "step": 364
+    },
+    {
+      "epoch": 0.2987212276214834,
+      "grad_norm": 1.4791219234466553,
+      "learning_rate": 0.0001896848954100062,
+      "loss": 0.0586,
+      "step": 365
+    },
+    {
+      "epoch": 0.299539641943734,
+      "grad_norm": 7.496979713439941,
+      "learning_rate": 0.000189627727770145,
+      "loss": 0.0033,
+      "step": 366
+    },
+    {
+      "epoch": 0.30035805626598466,
+      "grad_norm": 2.8373289108276367,
+      "learning_rate": 0.00018957041081614845,
+      "loss": 0.0062,
+      "step": 367
+    },
+    {
+      "epoch": 0.30117647058823527,
+      "grad_norm": 0.04176490381360054,
+      "learning_rate": 0.000189512944643503,
+      "loss": 0.0006,
+      "step": 368
+    },
+    {
+      "epoch": 0.30199488491048593,
+      "grad_norm": 6.637153625488281,
+      "learning_rate": 0.00018945532934794363,
+      "loss": 0.0217,
+      "step": 369
+    },
+    {
+      "epoch": 0.3028132992327366,
+      "grad_norm": 5.947137355804443,
+      "learning_rate": 0.0001893975650254538,
+      "loss": 0.0624,
+      "step": 370
+    },
+    {
+      "epoch": 0.3036317135549872,
+      "grad_norm": 4.605132102966309,
+      "learning_rate": 0.0001893396517722652,
+      "loss": 0.0287,
+      "step": 371
+    },
+    {
+      "epoch": 0.30445012787723785,
+      "grad_norm": 0.5078949928283691,
+      "learning_rate": 0.00018928158968485769,
+      "loss": 0.005,
+      "step": 372
+    },
+    {
+      "epoch": 0.3052685421994885,
+      "grad_norm": 2.0189433097839355,
+      "learning_rate": 0.00018922337885995903,
+      "loss": 0.0471,
+      "step": 373
+    },
+    {
+      "epoch": 0.3060869565217391,
+      "grad_norm": 1.7046236991882324,
+      "learning_rate": 0.00018916501939454476,
+      "loss": 0.1305,
+      "step": 374
+    },
+    {
+      "epoch": 0.3069053708439898,
+      "grad_norm": 23.23792839050293,
+      "learning_rate": 0.00018910651138583808,
+      "loss": 0.1648,
+      "step": 375
+    },
+    {
+      "epoch": 0.3077237851662404,
+      "grad_norm": 10.033699989318848,
+      "learning_rate": 0.00018904785493130963,
+      "loss": 0.1637,
+      "step": 376
+    },
+    {
+      "epoch": 0.30854219948849104,
+      "grad_norm": 1.7353355884552002,
+      "learning_rate": 0.00018898905012867736,
+      "loss": 0.041,
+      "step": 377
+    },
+    {
+      "epoch": 0.3093606138107417,
+      "grad_norm": 3.0973310470581055,
+      "learning_rate": 0.00018893009707590636,
+      "loss": 0.1848,
+      "step": 378
+    },
+    {
+      "epoch": 0.3101790281329923,
+      "grad_norm": 6.835923194885254,
+      "learning_rate": 0.0001888709958712087,
+      "loss": 0.1367,
+      "step": 379
+    },
+    {
+      "epoch": 0.31099744245524297,
+      "grad_norm": 5.92802619934082,
+      "learning_rate": 0.00018881174661304327,
+      "loss": 0.0561,
+      "step": 380
+    },
+    {
+      "epoch": 0.3118158567774936,
+      "grad_norm": 2.9860589504241943,
+      "learning_rate": 0.00018875234940011557,
+      "loss": 0.106,
+      "step": 381
+    },
+    {
+      "epoch": 0.31263427109974423,
+      "grad_norm": 2.7648398876190186,
+      "learning_rate": 0.00018869280433137759,
+      "loss": 0.0348,
+      "step": 382
+    },
+    {
+      "epoch": 0.3134526854219949,
+      "grad_norm": 1.2207233905792236,
+      "learning_rate": 0.00018863311150602773,
+      "loss": 0.0328,
+      "step": 383
+    },
+    {
+      "epoch": 0.3142710997442455,
+      "grad_norm": 0.7907407879829407,
+      "learning_rate": 0.00018857327102351034,
+      "loss": 0.0191,
+      "step": 384
+    },
+    {
+      "epoch": 0.31508951406649616,
+      "grad_norm": 0.05348999425768852,
+      "learning_rate": 0.000188513282983516,
+      "loss": 0.0023,
+      "step": 385
+    },
+    {
+      "epoch": 0.3159079283887468,
+      "grad_norm": 1.7986161708831787,
+      "learning_rate": 0.00018845314748598094,
+      "loss": 0.0403,
+      "step": 386
+    },
+    {
+      "epoch": 0.3167263427109974,
+      "grad_norm": 0.08972010016441345,
+      "learning_rate": 0.00018839286463108717,
+      "loss": 0.0015,
+      "step": 387
+    },
+    {
+      "epoch": 0.3175447570332481,
+      "grad_norm": 2.7837443351745605,
+      "learning_rate": 0.000188332434519262,
+      "loss": 0.0452,
+      "step": 388
+    },
+    {
+      "epoch": 0.31836317135549874,
+      "grad_norm": 0.03810626640915871,
+      "learning_rate": 0.00018827185725117827,
+      "loss": 0.0005,
+      "step": 389
+    },
+    {
+      "epoch": 0.31918158567774935,
+      "grad_norm": 3.4926528930664062,
+      "learning_rate": 0.00018821113292775388,
+      "loss": 0.1284,
+      "step": 390
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 4.0867791175842285,
+      "learning_rate": 0.0001881502616501517,
+      "loss": 0.0975,
+      "step": 391
+    },
+    {
+      "epoch": 0.32081841432225067,
+      "grad_norm": 0.2339545637369156,
+      "learning_rate": 0.00018808924351977944,
+      "loss": 0.0018,
+      "step": 392
+    },
+    {
+      "epoch": 0.32163682864450127,
+      "grad_norm": 9.076642990112305,
+      "learning_rate": 0.00018802807863828945,
+      "loss": 0.1044,
+      "step": 393
+    },
+    {
+      "epoch": 0.32245524296675193,
+      "grad_norm": 11.172904014587402,
+      "learning_rate": 0.00018796676710757854,
+      "loss": 0.0998,
+      "step": 394
+    },
+    {
+      "epoch": 0.32327365728900254,
+      "grad_norm": 5.419793128967285,
+      "learning_rate": 0.00018790530902978788,
+      "loss": 0.0529,
+      "step": 395
+    },
+    {
+      "epoch": 0.3240920716112532,
+      "grad_norm": 0.1783570945262909,
+      "learning_rate": 0.00018784370450730274,
+      "loss": 0.0013,
+      "step": 396
+    },
+    {
+      "epoch": 0.32491048593350386,
+      "grad_norm": 2.391124725341797,
+      "learning_rate": 0.00018778195364275234,
+      "loss": 0.0167,
+      "step": 397
+    },
+    {
+      "epoch": 0.32572890025575446,
+      "grad_norm": 11.476174354553223,
+      "learning_rate": 0.00018772005653900977,
+      "loss": 0.0821,
+      "step": 398
+    },
+    {
+      "epoch": 0.3265473145780051,
+      "grad_norm": 4.880053997039795,
+      "learning_rate": 0.00018765801329919166,
+      "loss": 0.0475,
+      "step": 399
+    },
+    {
+      "epoch": 0.3273657289002558,
+      "grad_norm": 4.687880039215088,
+      "learning_rate": 0.00018759582402665814,
+      "loss": 0.3302,
+      "step": 400
+    },
+    {
+      "epoch": 0.3273657289002558,
+      "eval_loss": 0.01040154229849577,
+      "eval_runtime": 236.3558,
+      "eval_samples_per_second": 6.896,
+      "eval_steps_per_second": 1.726,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.980132394795008e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null