Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8de489ef3b321cb1ae9af64ca58c6dbec7a1d4128393e4857a2c221f6ea076a
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:19706cf0c3c9917c9184ea6dd896ae403cff3f60135017a3817469095368ba3d
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d3a26ad7e055007417a4ed97af3d431946811ebd851b248dab10f8f6205bc8a
 size 81730196

 version https://git-lfs.github.com/spec/v1
+oid sha256:599285b80cf8578bfb241dec5482dcbc643d2bbff8ed837d8b8bde37f4468e55
 size 81730196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0331b567cfc16b8ba9714182b6d1197f7e53379d529dce315a5110d4c146f3d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c4c574b02d176cf65e372b568790452567f681562fac58ee8b8e4f350e30a8d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7d9ae41e6d249905bc3fac6a098c95704c803904081b2ec9bf85cc2bde9d9e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0bc2caca23d108aa0e5bff0f5c61c148bd9877bb3fac62655062c7ee8a3560e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2061141729354858,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.006965000870625109,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,714 @@
       "eval_samples_per_second": 7.146,
       "eval_steps_per_second": 1.786,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -751,7 +1459,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.7358071474880512e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.993212103843689,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.013930001741250218,
   "eval_steps": 100,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.146,
       "eval_steps_per_second": 1.786,
       "step": 100
+    },
+    {
+      "epoch": 0.00703465087933136,
+      "grad_norm": 0.7039173245429993,
+      "learning_rate": 0.00019556189188465702,
+      "loss": 1.4391,
+      "step": 101
+    },
+    {
+      "epoch": 0.007104300888037611,
+      "grad_norm": 0.8350788354873657,
+      "learning_rate": 0.00019546456321594376,
+      "loss": 1.1431,
+      "step": 102
+    },
+    {
+      "epoch": 0.0071739508967438624,
+      "grad_norm": 0.6535744667053223,
+      "learning_rate": 0.0001953662036253438,
+      "loss": 1.296,
+      "step": 103
+    },
+    {
+      "epoch": 0.0072436009054501135,
+      "grad_norm": 0.7496301531791687,
+      "learning_rate": 0.00019526681417504258,
+      "loss": 1.311,
+      "step": 104
+    },
+    {
+      "epoch": 0.007313250914156365,
+      "grad_norm": 0.7061691880226135,
+      "learning_rate": 0.0001951663959383468,
+      "loss": 1.3601,
+      "step": 105
+    },
+    {
+      "epoch": 0.007382900922862616,
+      "grad_norm": 0.8221380114555359,
+      "learning_rate": 0.00019506494999967298,
+      "loss": 1.3149,
+      "step": 106
+    },
+    {
+      "epoch": 0.007452550931568867,
+      "grad_norm": 0.9544386267662048,
+      "learning_rate": 0.000194962477454536,
+      "loss": 1.2967,
+      "step": 107
+    },
+    {
+      "epoch": 0.007522200940275118,
+      "grad_norm": 0.8127594590187073,
+      "learning_rate": 0.00019485897940953688,
+      "loss": 1.4015,
+      "step": 108
+    },
+    {
+      "epoch": 0.007591850948981369,
+      "grad_norm": 0.7376645803451538,
+      "learning_rate": 0.0001947544569823511,
+      "loss": 1.4958,
+      "step": 109
+    },
+    {
+      "epoch": 0.00766150095768762,
+      "grad_norm": 0.6602767705917358,
+      "learning_rate": 0.00019464891130171647,
+      "loss": 1.3593,
+      "step": 110
+    },
+    {
+      "epoch": 0.007731150966393871,
+      "grad_norm": 0.9318028092384338,
+      "learning_rate": 0.0001945423435074208,
+      "loss": 1.0125,
+      "step": 111
+    },
+    {
+      "epoch": 0.007800800975100122,
+      "grad_norm": 0.7048940062522888,
+      "learning_rate": 0.00019443475475028983,
+      "loss": 1.4342,
+      "step": 112
+    },
+    {
+      "epoch": 0.007870450983806372,
+      "grad_norm": 0.9778817892074585,
+      "learning_rate": 0.00019432614619217459,
+      "loss": 1.0368,
+      "step": 113
+    },
+    {
+      "epoch": 0.007940100992512624,
+      "grad_norm": 0.808047890663147,
+      "learning_rate": 0.000194216519005939,
+      "loss": 1.105,
+      "step": 114
+    },
+    {
+      "epoch": 0.008009751001218875,
+      "grad_norm": 0.7996501326560974,
+      "learning_rate": 0.0001941058743754471,
+      "loss": 1.1383,
+      "step": 115
+    },
+    {
+      "epoch": 0.008079401009925127,
+      "grad_norm": 1.0752230882644653,
+      "learning_rate": 0.00019399421349555035,
+      "loss": 1.3508,
+      "step": 116
+    },
+    {
+      "epoch": 0.008149051018631377,
+      "grad_norm": 0.7151166200637817,
+      "learning_rate": 0.00019388153757207471,
+      "loss": 1.4086,
+      "step": 117
+    },
+    {
+      "epoch": 0.008218701027337629,
+      "grad_norm": 0.7622511386871338,
+      "learning_rate": 0.00019376784782180746,
+      "loss": 1.1942,
+      "step": 118
+    },
+    {
+      "epoch": 0.008288351036043879,
+      "grad_norm": 0.6896407008171082,
+      "learning_rate": 0.0001936531454724844,
+      "loss": 1.2571,
+      "step": 119
+    },
+    {
+      "epoch": 0.008358001044750131,
+      "grad_norm": 0.7991106510162354,
+      "learning_rate": 0.00019353743176277622,
+      "loss": 1.2531,
+      "step": 120
+    },
+    {
+      "epoch": 0.008427651053456381,
+      "grad_norm": 0.8540248870849609,
+      "learning_rate": 0.00019342070794227536,
+      "loss": 1.223,
+      "step": 121
+    },
+    {
+      "epoch": 0.008497301062162633,
+      "grad_norm": 0.8329891562461853,
+      "learning_rate": 0.00019330297527148246,
+      "loss": 0.9099,
+      "step": 122
+    },
+    {
+      "epoch": 0.008566951070868883,
+      "grad_norm": 0.7838830351829529,
+      "learning_rate": 0.00019318423502179272,
+      "loss": 1.3098,
+      "step": 123
+    },
+    {
+      "epoch": 0.008636601079575135,
+      "grad_norm": 0.7665576338768005,
+      "learning_rate": 0.00019306448847548216,
+      "loss": 1.3633,
+      "step": 124
+    },
+    {
+      "epoch": 0.008706251088281386,
+      "grad_norm": 0.7157841324806213,
+      "learning_rate": 0.00019294373692569383,
+      "loss": 0.9222,
+      "step": 125
+    },
+    {
+      "epoch": 0.008775901096987638,
+      "grad_norm": 0.944957971572876,
+      "learning_rate": 0.0001928219816764238,
+      "loss": 1.0901,
+      "step": 126
+    },
+    {
+      "epoch": 0.008845551105693888,
+      "grad_norm": 0.636736273765564,
+      "learning_rate": 0.0001926992240425071,
+      "loss": 1.3484,
+      "step": 127
+    },
+    {
+      "epoch": 0.00891520111440014,
+      "grad_norm": 0.6209918260574341,
+      "learning_rate": 0.0001925754653496035,
+      "loss": 1.3551,
+      "step": 128
+    },
+    {
+      "epoch": 0.00898485112310639,
+      "grad_norm": 0.7056594491004944,
+      "learning_rate": 0.00019245070693418322,
+      "loss": 1.4229,
+      "step": 129
+    },
+    {
+      "epoch": 0.009054501131812642,
+      "grad_norm": 0.7279839515686035,
+      "learning_rate": 0.00019232495014351246,
+      "loss": 1.0699,
+      "step": 130
+    },
+    {
+      "epoch": 0.009124151140518892,
+      "grad_norm": 0.6324151754379272,
+      "learning_rate": 0.00019219819633563891,
+      "loss": 1.3833,
+      "step": 131
+    },
+    {
+      "epoch": 0.009193801149225144,
+      "grad_norm": 0.7449592351913452,
+      "learning_rate": 0.00019207044687937703,
+      "loss": 1.2067,
+      "step": 132
+    },
+    {
+      "epoch": 0.009263451157931394,
+      "grad_norm": 0.939274787902832,
+      "learning_rate": 0.0001919417031542933,
+      "loss": 1.3229,
+      "step": 133
+    },
+    {
+      "epoch": 0.009333101166637646,
+      "grad_norm": 0.8192336559295654,
+      "learning_rate": 0.00019181196655069127,
+      "loss": 1.1575,
+      "step": 134
+    },
+    {
+      "epoch": 0.009402751175343897,
+      "grad_norm": 0.7507984638214111,
+      "learning_rate": 0.00019168123846959666,
+      "loss": 1.0461,
+      "step": 135
+    },
+    {
+      "epoch": 0.009472401184050148,
+      "grad_norm": 0.6593666672706604,
+      "learning_rate": 0.00019154952032274206,
+      "loss": 1.3806,
+      "step": 136
+    },
+    {
+      "epoch": 0.009542051192756399,
+      "grad_norm": 0.6475424766540527,
+      "learning_rate": 0.00019141681353255184,
+      "loss": 0.9218,
+      "step": 137
+    },
+    {
+      "epoch": 0.00961170120146265,
+      "grad_norm": 0.7746126651763916,
+      "learning_rate": 0.00019128311953212678,
+      "loss": 0.8967,
+      "step": 138
+    },
+    {
+      "epoch": 0.009681351210168901,
+      "grad_norm": 0.7104780673980713,
+      "learning_rate": 0.00019114843976522842,
+      "loss": 1.1855,
+      "step": 139
+    },
+    {
+      "epoch": 0.009751001218875153,
+      "grad_norm": 0.597457230091095,
+      "learning_rate": 0.00019101277568626374,
+      "loss": 1.0809,
+      "step": 140
+    },
+    {
+      "epoch": 0.009820651227581403,
+      "grad_norm": 0.8071316480636597,
+      "learning_rate": 0.00019087612876026908,
+      "loss": 1.0129,
+      "step": 141
+    },
+    {
+      "epoch": 0.009890301236287655,
+      "grad_norm": 0.8741605877876282,
+      "learning_rate": 0.00019073850046289484,
+      "loss": 0.8784,
+      "step": 142
+    },
+    {
+      "epoch": 0.009959951244993905,
+      "grad_norm": 0.7503401637077332,
+      "learning_rate": 0.00019059989228038902,
+      "loss": 1.1498,
+      "step": 143
+    },
+    {
+      "epoch": 0.010029601253700157,
+      "grad_norm": 0.7068141102790833,
+      "learning_rate": 0.0001904603057095815,
+      "loss": 1.2644,
+      "step": 144
+    },
+    {
+      "epoch": 0.010099251262406407,
+      "grad_norm": 0.7954654097557068,
+      "learning_rate": 0.0001903197422578678,
+      "loss": 1.1108,
+      "step": 145
+    },
+    {
+      "epoch": 0.01016890127111266,
+      "grad_norm": 0.7548302412033081,
+      "learning_rate": 0.0001901782034431927,
+      "loss": 0.9177,
+      "step": 146
+    },
+    {
+      "epoch": 0.01023855127981891,
+      "grad_norm": 0.7617766261100769,
+      "learning_rate": 0.00019003569079403395,
+      "loss": 1.256,
+      "step": 147
+    },
+    {
+      "epoch": 0.010308201288525162,
+      "grad_norm": 0.7205716967582703,
+      "learning_rate": 0.00018989220584938573,
+      "loss": 1.3767,
+      "step": 148
+    },
+    {
+      "epoch": 0.010377851297231412,
+      "grad_norm": 0.6221201419830322,
+      "learning_rate": 0.00018974775015874213,
+      "loss": 1.3329,
+      "step": 149
+    },
+    {
+      "epoch": 0.010447501305937664,
+      "grad_norm": 0.565428614616394,
+      "learning_rate": 0.00018960232528208022,
+      "loss": 1.1155,
+      "step": 150
+    },
+    {
+      "epoch": 0.010517151314643914,
+      "grad_norm": 0.7672913074493408,
+      "learning_rate": 0.00018945593278984333,
+      "loss": 0.9654,
+      "step": 151
+    },
+    {
+      "epoch": 0.010586801323350166,
+      "grad_norm": 0.737074077129364,
+      "learning_rate": 0.00018930857426292412,
+      "loss": 1.0644,
+      "step": 152
+    },
+    {
+      "epoch": 0.010656451332056416,
+      "grad_norm": 0.6545393466949463,
+      "learning_rate": 0.0001891602512926474,
+      "loss": 1.2058,
+      "step": 153
+    },
+    {
+      "epoch": 0.010726101340762668,
+      "grad_norm": 0.8019453287124634,
+      "learning_rate": 0.00018901096548075305,
+      "loss": 1.3134,
+      "step": 154
+    },
+    {
+      "epoch": 0.010795751349468918,
+      "grad_norm": 0.8307440876960754,
+      "learning_rate": 0.00018886071843937866,
+      "loss": 1.152,
+      "step": 155
+    },
+    {
+      "epoch": 0.01086540135817517,
+      "grad_norm": 0.8050329089164734,
+      "learning_rate": 0.00018870951179104212,
+      "loss": 0.9473,
+      "step": 156
+    },
+    {
+      "epoch": 0.01093505136688142,
+      "grad_norm": 0.7510560154914856,
+      "learning_rate": 0.00018855734716862417,
+      "loss": 1.2265,
+      "step": 157
+    },
+    {
+      "epoch": 0.011004701375587672,
+      "grad_norm": 0.7653977274894714,
+      "learning_rate": 0.00018840422621535066,
+      "loss": 1.3356,
+      "step": 158
+    },
+    {
+      "epoch": 0.011074351384293923,
+      "grad_norm": 0.7661434412002563,
+      "learning_rate": 0.00018825015058477481,
+      "loss": 0.9601,
+      "step": 159
+    },
+    {
+      "epoch": 0.011144001393000175,
+      "grad_norm": 0.7829368114471436,
+      "learning_rate": 0.00018809512194075957,
+      "loss": 1.0675,
+      "step": 160
+    },
+    {
+      "epoch": 0.011213651401706425,
+      "grad_norm": 0.6673858761787415,
+      "learning_rate": 0.00018793914195745933,
+      "loss": 1.4312,
+      "step": 161
+    },
+    {
+      "epoch": 0.011283301410412677,
+      "grad_norm": 0.8060672879219055,
+      "learning_rate": 0.00018778221231930203,
+      "loss": 1.0241,
+      "step": 162
+    },
+    {
+      "epoch": 0.011352951419118927,
+      "grad_norm": 1.0137969255447388,
+      "learning_rate": 0.00018762433472097097,
+      "loss": 1.1867,
+      "step": 163
+    },
+    {
+      "epoch": 0.011422601427825179,
+      "grad_norm": 0.9313655495643616,
+      "learning_rate": 0.0001874655108673864,
+      "loss": 1.3046,
+      "step": 164
+    },
+    {
+      "epoch": 0.01149225143653143,
+      "grad_norm": 0.9493317008018494,
+      "learning_rate": 0.00018730574247368732,
+      "loss": 1.1123,
+      "step": 165
+    },
+    {
+      "epoch": 0.011561901445237681,
+      "grad_norm": 0.8069944977760315,
+      "learning_rate": 0.0001871450312652126,
+      "loss": 1.0592,
+      "step": 166
+    },
+    {
+      "epoch": 0.011631551453943931,
+      "grad_norm": 0.6559287905693054,
+      "learning_rate": 0.00018698337897748283,
+      "loss": 1.2388,
+      "step": 167
+    },
+    {
+      "epoch": 0.011701201462650183,
+      "grad_norm": 0.650059700012207,
+      "learning_rate": 0.0001868207873561811,
+      "loss": 0.9891,
+      "step": 168
+    },
+    {
+      "epoch": 0.011770851471356434,
+      "grad_norm": 0.6247674822807312,
+      "learning_rate": 0.00018665725815713443,
+      "loss": 1.2925,
+      "step": 169
+    },
+    {
+      "epoch": 0.011840501480062686,
+      "grad_norm": 0.7453685402870178,
+      "learning_rate": 0.00018649279314629483,
+      "loss": 1.06,
+      "step": 170
+    },
+    {
+      "epoch": 0.011910151488768936,
+      "grad_norm": 0.826835572719574,
+      "learning_rate": 0.00018632739409972003,
+      "loss": 0.9637,
+      "step": 171
+    },
+    {
+      "epoch": 0.011979801497475188,
+      "grad_norm": 0.7538785338401794,
+      "learning_rate": 0.00018616106280355444,
+      "loss": 1.0126,
+      "step": 172
+    },
+    {
+      "epoch": 0.012049451506181438,
+      "grad_norm": 0.8348299264907837,
+      "learning_rate": 0.00018599380105400982,
+      "loss": 0.988,
+      "step": 173
+    },
+    {
+      "epoch": 0.01211910151488769,
+      "grad_norm": 0.8298357725143433,
+      "learning_rate": 0.00018582561065734604,
+      "loss": 1.0608,
+      "step": 174
+    },
+    {
+      "epoch": 0.01218875152359394,
+      "grad_norm": 0.6961440443992615,
+      "learning_rate": 0.00018565649342985118,
+      "loss": 1.1564,
+      "step": 175
+    },
+    {
+      "epoch": 0.012258401532300192,
+      "grad_norm": 0.664256751537323,
+      "learning_rate": 0.00018548645119782238,
+      "loss": 1.1865,
+      "step": 176
+    },
+    {
+      "epoch": 0.012328051541006442,
+      "grad_norm": 0.7857444882392883,
+      "learning_rate": 0.0001853154857975458,
+      "loss": 0.9903,
+      "step": 177
+    },
+    {
+      "epoch": 0.012397701549712694,
+      "grad_norm": 0.758602499961853,
+      "learning_rate": 0.0001851435990752769,
+      "loss": 1.3456,
+      "step": 178
+    },
+    {
+      "epoch": 0.012467351558418945,
+      "grad_norm": 0.768666684627533,
+      "learning_rate": 0.0001849707928872206,
+      "loss": 0.9773,
+      "step": 179
+    },
+    {
+      "epoch": 0.012537001567125197,
+      "grad_norm": 0.8674852848052979,
+      "learning_rate": 0.00018479706909951094,
+      "loss": 1.0203,
+      "step": 180
+    },
+    {
+      "epoch": 0.012606651575831447,
+      "grad_norm": 0.6384921669960022,
+      "learning_rate": 0.0001846224295881913,
+      "loss": 1.1004,
+      "step": 181
+    },
+    {
+      "epoch": 0.012676301584537699,
+      "grad_norm": 0.6848528981208801,
+      "learning_rate": 0.00018444687623919386,
+      "loss": 1.0699,
+      "step": 182
+    },
+    {
+      "epoch": 0.012745951593243949,
+      "grad_norm": 0.6943731307983398,
+      "learning_rate": 0.00018427041094831937,
+      "loss": 1.1812,
+      "step": 183
+    },
+    {
+      "epoch": 0.012815601601950201,
+      "grad_norm": 1.0284762382507324,
+      "learning_rate": 0.00018409303562121662,
+      "loss": 1.1307,
+      "step": 184
+    },
+    {
+      "epoch": 0.012885251610656451,
+      "grad_norm": 0.7977420091629028,
+      "learning_rate": 0.00018391475217336193,
+      "loss": 1.0772,
+      "step": 185
+    },
+    {
+      "epoch": 0.012954901619362703,
+      "grad_norm": 0.678799569606781,
+      "learning_rate": 0.0001837355625300383,
+      "loss": 1.1816,
+      "step": 186
+    },
+    {
+      "epoch": 0.013024551628068953,
+      "grad_norm": 0.7933035492897034,
+      "learning_rate": 0.00018355546862631493,
+      "loss": 1.2014,
+      "step": 187
+    },
+    {
+      "epoch": 0.013094201636775205,
+      "grad_norm": 0.7373278737068176,
+      "learning_rate": 0.00018337447240702594,
+      "loss": 0.9163,
+      "step": 188
+    },
+    {
+      "epoch": 0.013163851645481455,
+      "grad_norm": 0.7306934595108032,
+      "learning_rate": 0.00018319257582674964,
+      "loss": 0.8467,
+      "step": 189
+    },
+    {
+      "epoch": 0.013233501654187707,
+      "grad_norm": 0.6722437739372253,
+      "learning_rate": 0.00018300978084978735,
+      "loss": 1.1145,
+      "step": 190
+    },
+    {
+      "epoch": 0.013303151662893958,
+      "grad_norm": 0.8375574350357056,
+      "learning_rate": 0.00018282608945014217,
+      "loss": 0.8763,
+      "step": 191
+    },
+    {
+      "epoch": 0.01337280167160021,
+      "grad_norm": 0.6876571774482727,
+      "learning_rate": 0.0001826415036114976,
+      "loss": 1.3694,
+      "step": 192
+    },
+    {
+      "epoch": 0.01344245168030646,
+      "grad_norm": 0.5936222076416016,
+      "learning_rate": 0.0001824560253271963,
+      "loss": 1.4071,
+      "step": 193
+    },
+    {
+      "epoch": 0.013512101689012712,
+      "grad_norm": 0.6679614782333374,
+      "learning_rate": 0.00018226965660021836,
+      "loss": 0.8098,
+      "step": 194
+    },
+    {
+      "epoch": 0.013581751697718962,
+      "grad_norm": 0.8226193189620972,
+      "learning_rate": 0.00018208239944315978,
+      "loss": 0.6594,
+      "step": 195
+    },
+    {
+      "epoch": 0.013651401706425214,
+      "grad_norm": 0.8376763463020325,
+      "learning_rate": 0.0001818942558782108,
+      "loss": 1.0417,
+      "step": 196
+    },
+    {
+      "epoch": 0.013721051715131464,
+      "grad_norm": 0.773747444152832,
+      "learning_rate": 0.00018170522793713387,
+      "loss": 0.7496,
+      "step": 197
+    },
+    {
+      "epoch": 0.013790701723837716,
+      "grad_norm": 0.8213014006614685,
+      "learning_rate": 0.00018151531766124186,
+      "loss": 0.842,
+      "step": 198
+    },
+    {
+      "epoch": 0.013860351732543966,
+      "grad_norm": 0.6993326544761658,
+      "learning_rate": 0.000181324527101376,
+      "loss": 1.1651,
+      "step": 199
+    },
+    {
+      "epoch": 0.013930001741250218,
+      "grad_norm": 0.550957977771759,
+      "learning_rate": 0.00018113285831788365,
+      "loss": 1.2762,
+      "step": 200
+    },
+    {
+      "epoch": 0.013930001741250218,
+      "eval_loss": 0.993212103843689,
+      "eval_runtime": 699.7494,
+      "eval_samples_per_second": 7.145,
+      "eval_steps_per_second": 1.786,
+      "step": 200
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.4518373758376346e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null