{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5446997489862908, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00019308746862328635, "grad_norm": 1.4612300395965576, "learning_rate": 9.652509652509653e-08, "loss": 1.1998, "step": 1 }, { "epoch": 0.0003861749372465727, "grad_norm": 1.780540108680725, "learning_rate": 1.9305019305019306e-07, "loss": 1.1459, "step": 2 }, { "epoch": 0.0005792624058698591, "grad_norm": 1.652919054031372, "learning_rate": 2.8957528957528957e-07, "loss": 1.2813, "step": 3 }, { "epoch": 0.0007723498744931454, "grad_norm": 1.6971830129623413, "learning_rate": 3.8610038610038613e-07, "loss": 1.1878, "step": 4 }, { "epoch": 0.0009654373431164317, "grad_norm": 1.8193821907043457, "learning_rate": 4.826254826254826e-07, "loss": 1.1911, "step": 5 }, { "epoch": 0.0011585248117397182, "grad_norm": 1.5276634693145752, "learning_rate": 5.791505791505791e-07, "loss": 1.248, "step": 6 }, { "epoch": 0.0013516122803630044, "grad_norm": 1.681528091430664, "learning_rate": 6.756756756756758e-07, "loss": 1.2605, "step": 7 }, { "epoch": 0.0015446997489862908, "grad_norm": 3.0537164211273193, "learning_rate": 7.722007722007723e-07, "loss": 1.11, "step": 8 }, { "epoch": 0.0017377872176095772, "grad_norm": 1.4215664863586426, "learning_rate": 8.687258687258688e-07, "loss": 1.1758, "step": 9 }, { "epoch": 0.0019308746862328635, "grad_norm": 1.8343621492385864, "learning_rate": 9.652509652509653e-07, "loss": 1.1539, "step": 10 }, { "epoch": 0.0021239621548561497, "grad_norm": 1.6930686235427856, "learning_rate": 1.0617760617760618e-06, "loss": 1.3129, "step": 11 }, { "epoch": 0.0023170496234794363, "grad_norm": 1.5111931562423706, "learning_rate": 1.1583011583011583e-06, "loss": 1.2012, "step": 12 }, { "epoch": 0.0025101370921027225, "grad_norm": 1.7829928398132324, "learning_rate": 1.2548262548262548e-06, "loss": 1.2668, "step": 13 }, { "epoch": 0.0027032245607260088, "grad_norm": 2.1506600379943848, "learning_rate": 1.3513513513513515e-06, "loss": 1.1913, "step": 14 }, { "epoch": 0.0028963120293492954, "grad_norm": 1.5398775339126587, "learning_rate": 1.447876447876448e-06, "loss": 1.1639, "step": 15 }, { "epoch": 0.0030893994979725816, "grad_norm": 3.18918514251709, "learning_rate": 1.5444015444015445e-06, "loss": 1.1737, "step": 16 }, { "epoch": 0.003282486966595868, "grad_norm": 1.2985707521438599, "learning_rate": 1.6409266409266408e-06, "loss": 1.2231, "step": 17 }, { "epoch": 0.0034755744352191545, "grad_norm": 1.4303349256515503, "learning_rate": 1.7374517374517375e-06, "loss": 1.2371, "step": 18 }, { "epoch": 0.0036686619038424407, "grad_norm": 1.403638243675232, "learning_rate": 1.8339768339768342e-06, "loss": 1.188, "step": 19 }, { "epoch": 0.003861749372465727, "grad_norm": 1.367430329322815, "learning_rate": 1.9305019305019305e-06, "loss": 1.2084, "step": 20 }, { "epoch": 0.004054836841089014, "grad_norm": 1.6066632270812988, "learning_rate": 2.0270270270270273e-06, "loss": 1.1736, "step": 21 }, { "epoch": 0.004247924309712299, "grad_norm": 1.5477393865585327, "learning_rate": 2.1235521235521236e-06, "loss": 1.1437, "step": 22 }, { "epoch": 0.004441011778335586, "grad_norm": 1.9692046642303467, "learning_rate": 2.2200772200772203e-06, "loss": 1.1076, "step": 23 }, { "epoch": 0.004634099246958873, "grad_norm": 7.645205974578857, "learning_rate": 2.3166023166023166e-06, "loss": 1.194, "step": 24 }, { "epoch": 0.0048271867155821584, "grad_norm": 1.2169207334518433, "learning_rate": 2.4131274131274133e-06, "loss": 1.1925, "step": 25 }, { "epoch": 0.005020274184205445, "grad_norm": 2.3346774578094482, "learning_rate": 2.5096525096525096e-06, "loss": 1.1937, "step": 26 }, { "epoch": 0.005213361652828732, "grad_norm": 1.227017879486084, "learning_rate": 2.6061776061776063e-06, "loss": 1.1894, "step": 27 }, { "epoch": 0.0054064491214520175, "grad_norm": 2.095817804336548, "learning_rate": 2.702702702702703e-06, "loss": 1.1093, "step": 28 }, { "epoch": 0.005599536590075304, "grad_norm": 1.2119330167770386, "learning_rate": 2.7992277992277993e-06, "loss": 1.2529, "step": 29 }, { "epoch": 0.005792624058698591, "grad_norm": 1.4704550504684448, "learning_rate": 2.895752895752896e-06, "loss": 1.2464, "step": 30 }, { "epoch": 0.005985711527321877, "grad_norm": 1.3670967817306519, "learning_rate": 2.9922779922779923e-06, "loss": 1.2063, "step": 31 }, { "epoch": 0.006178798995945163, "grad_norm": 1.954966425895691, "learning_rate": 3.088803088803089e-06, "loss": 1.1252, "step": 32 }, { "epoch": 0.00637188646456845, "grad_norm": 1.4206795692443848, "learning_rate": 3.1853281853281853e-06, "loss": 1.1495, "step": 33 }, { "epoch": 0.006564973933191736, "grad_norm": 2.3426260948181152, "learning_rate": 3.2818532818532816e-06, "loss": 1.162, "step": 34 }, { "epoch": 0.006758061401815022, "grad_norm": 1.114580512046814, "learning_rate": 3.3783783783783788e-06, "loss": 1.0837, "step": 35 }, { "epoch": 0.006951148870438309, "grad_norm": 0.8600135445594788, "learning_rate": 3.474903474903475e-06, "loss": 1.0954, "step": 36 }, { "epoch": 0.007144236339061595, "grad_norm": 1.0426526069641113, "learning_rate": 3.5714285714285714e-06, "loss": 1.1341, "step": 37 }, { "epoch": 0.007337323807684881, "grad_norm": 1.0329654216766357, "learning_rate": 3.6679536679536685e-06, "loss": 1.0353, "step": 38 }, { "epoch": 0.007530411276308167, "grad_norm": 0.8607264161109924, "learning_rate": 3.764478764478765e-06, "loss": 1.1059, "step": 39 }, { "epoch": 0.007723498744931454, "grad_norm": 1.1297675371170044, "learning_rate": 3.861003861003861e-06, "loss": 1.1423, "step": 40 }, { "epoch": 0.00791658621355474, "grad_norm": 1.7621365785598755, "learning_rate": 3.957528957528957e-06, "loss": 1.1105, "step": 41 }, { "epoch": 0.008109673682178027, "grad_norm": 1.6920156478881836, "learning_rate": 4.0540540540540545e-06, "loss": 1.0729, "step": 42 }, { "epoch": 0.008302761150801313, "grad_norm": 0.9799310564994812, "learning_rate": 4.150579150579151e-06, "loss": 1.0144, "step": 43 }, { "epoch": 0.008495848619424599, "grad_norm": 1.6059881448745728, "learning_rate": 4.247104247104247e-06, "loss": 1.1367, "step": 44 }, { "epoch": 0.008688936088047886, "grad_norm": 0.9817798137664795, "learning_rate": 4.343629343629344e-06, "loss": 1.0622, "step": 45 }, { "epoch": 0.008882023556671172, "grad_norm": 1.0513640642166138, "learning_rate": 4.4401544401544405e-06, "loss": 1.0266, "step": 46 }, { "epoch": 0.009075111025294458, "grad_norm": 0.8522196412086487, "learning_rate": 4.536679536679537e-06, "loss": 1.029, "step": 47 }, { "epoch": 0.009268198493917745, "grad_norm": 0.9537653923034668, "learning_rate": 4.633204633204633e-06, "loss": 1.0477, "step": 48 }, { "epoch": 0.009461285962541031, "grad_norm": 0.798719048500061, "learning_rate": 4.72972972972973e-06, "loss": 1.0113, "step": 49 }, { "epoch": 0.009654373431164317, "grad_norm": 0.8048179149627686, "learning_rate": 4.8262548262548266e-06, "loss": 0.9908, "step": 50 }, { "epoch": 0.009847460899787604, "grad_norm": 1.0210130214691162, "learning_rate": 4.922779922779923e-06, "loss": 1.123, "step": 51 }, { "epoch": 0.01004054836841089, "grad_norm": 1.168614387512207, "learning_rate": 5.019305019305019e-06, "loss": 1.0614, "step": 52 }, { "epoch": 0.010233635837034176, "grad_norm": 0.8250200152397156, "learning_rate": 5.115830115830116e-06, "loss": 0.9906, "step": 53 }, { "epoch": 0.010426723305657463, "grad_norm": 0.9950800538063049, "learning_rate": 5.212355212355213e-06, "loss": 1.0561, "step": 54 }, { "epoch": 0.01061981077428075, "grad_norm": 0.7707552909851074, "learning_rate": 5.308880308880309e-06, "loss": 0.9695, "step": 55 }, { "epoch": 0.010812898242904035, "grad_norm": 0.8436341881752014, "learning_rate": 5.405405405405406e-06, "loss": 1.0073, "step": 56 }, { "epoch": 0.011005985711527323, "grad_norm": 1.718087077140808, "learning_rate": 5.501930501930502e-06, "loss": 1.0004, "step": 57 }, { "epoch": 0.011199073180150608, "grad_norm": 0.924625039100647, "learning_rate": 5.598455598455599e-06, "loss": 1.0242, "step": 58 }, { "epoch": 0.011392160648773894, "grad_norm": 0.9548898339271545, "learning_rate": 5.694980694980695e-06, "loss": 1.0324, "step": 59 }, { "epoch": 0.011585248117397182, "grad_norm": 0.8355027437210083, "learning_rate": 5.791505791505792e-06, "loss": 0.9832, "step": 60 }, { "epoch": 0.011778335586020467, "grad_norm": 2.3416240215301514, "learning_rate": 5.888030888030888e-06, "loss": 0.9665, "step": 61 }, { "epoch": 0.011971423054643753, "grad_norm": 0.7659398317337036, "learning_rate": 5.984555984555985e-06, "loss": 1.0076, "step": 62 }, { "epoch": 0.01216451052326704, "grad_norm": 1.1783533096313477, "learning_rate": 6.081081081081082e-06, "loss": 0.9744, "step": 63 }, { "epoch": 0.012357597991890326, "grad_norm": 0.6449043154716492, "learning_rate": 6.177606177606178e-06, "loss": 1.0114, "step": 64 }, { "epoch": 0.012550685460513612, "grad_norm": 0.6536839604377747, "learning_rate": 6.274131274131274e-06, "loss": 1.0093, "step": 65 }, { "epoch": 0.0127437729291369, "grad_norm": 0.6473376750946045, "learning_rate": 6.370656370656371e-06, "loss": 0.9758, "step": 66 }, { "epoch": 0.012936860397760186, "grad_norm": 1.144728660583496, "learning_rate": 6.467181467181467e-06, "loss": 1.0581, "step": 67 }, { "epoch": 0.013129947866383471, "grad_norm": 0.776213526725769, "learning_rate": 6.563706563706563e-06, "loss": 1.0117, "step": 68 }, { "epoch": 0.013323035335006759, "grad_norm": 0.923336386680603, "learning_rate": 6.660231660231661e-06, "loss": 0.9205, "step": 69 }, { "epoch": 0.013516122803630045, "grad_norm": 0.7814953923225403, "learning_rate": 6.7567567567567575e-06, "loss": 0.9443, "step": 70 }, { "epoch": 0.01370921027225333, "grad_norm": 0.8537019491195679, "learning_rate": 6.853281853281854e-06, "loss": 0.9241, "step": 71 }, { "epoch": 0.013902297740876618, "grad_norm": 1.1301255226135254, "learning_rate": 6.94980694980695e-06, "loss": 0.9852, "step": 72 }, { "epoch": 0.014095385209499904, "grad_norm": 0.8625062108039856, "learning_rate": 7.046332046332046e-06, "loss": 1.0692, "step": 73 }, { "epoch": 0.01428847267812319, "grad_norm": 0.6089375019073486, "learning_rate": 7.142857142857143e-06, "loss": 0.9844, "step": 74 }, { "epoch": 0.014481560146746475, "grad_norm": 0.7637853622436523, "learning_rate": 7.239382239382239e-06, "loss": 0.9289, "step": 75 }, { "epoch": 0.014674647615369763, "grad_norm": 1.192014217376709, "learning_rate": 7.335907335907337e-06, "loss": 0.9673, "step": 76 }, { "epoch": 0.014867735083993049, "grad_norm": 0.6673039197921753, "learning_rate": 7.432432432432433e-06, "loss": 0.9401, "step": 77 }, { "epoch": 0.015060822552616334, "grad_norm": 0.9406108856201172, "learning_rate": 7.52895752895753e-06, "loss": 0.9704, "step": 78 }, { "epoch": 0.015253910021239622, "grad_norm": 0.8270223736763, "learning_rate": 7.625482625482626e-06, "loss": 1.02, "step": 79 }, { "epoch": 0.015446997489862908, "grad_norm": 1.5373871326446533, "learning_rate": 7.722007722007722e-06, "loss": 0.961, "step": 80 }, { "epoch": 0.015640084958486195, "grad_norm": 0.6919928193092346, "learning_rate": 7.818532818532818e-06, "loss": 0.952, "step": 81 }, { "epoch": 0.01583317242710948, "grad_norm": 0.8759837746620178, "learning_rate": 7.915057915057915e-06, "loss": 0.9392, "step": 82 }, { "epoch": 0.016026259895732767, "grad_norm": 0.5847708582878113, "learning_rate": 8.011583011583013e-06, "loss": 0.9503, "step": 83 }, { "epoch": 0.016219347364356054, "grad_norm": 0.5833951234817505, "learning_rate": 8.108108108108109e-06, "loss": 0.8532, "step": 84 }, { "epoch": 0.01641243483297934, "grad_norm": 0.5780584216117859, "learning_rate": 8.204633204633205e-06, "loss": 0.9027, "step": 85 }, { "epoch": 0.016605522301602626, "grad_norm": 1.1678887605667114, "learning_rate": 8.301158301158302e-06, "loss": 0.9455, "step": 86 }, { "epoch": 0.016798609770225913, "grad_norm": 0.6515048742294312, "learning_rate": 8.397683397683398e-06, "loss": 0.9671, "step": 87 }, { "epoch": 0.016991697238849197, "grad_norm": 0.859980046749115, "learning_rate": 8.494208494208494e-06, "loss": 0.8954, "step": 88 }, { "epoch": 0.017184784707472485, "grad_norm": 1.1579340696334839, "learning_rate": 8.59073359073359e-06, "loss": 0.9858, "step": 89 }, { "epoch": 0.017377872176095772, "grad_norm": 0.6616291999816895, "learning_rate": 8.687258687258689e-06, "loss": 0.9725, "step": 90 }, { "epoch": 0.017570959644719056, "grad_norm": 10.529030799865723, "learning_rate": 8.783783783783785e-06, "loss": 1.0123, "step": 91 }, { "epoch": 0.017764047113342344, "grad_norm": 0.9118268489837646, "learning_rate": 8.880308880308881e-06, "loss": 0.9465, "step": 92 }, { "epoch": 0.01795713458196563, "grad_norm": 0.6226589679718018, "learning_rate": 8.976833976833977e-06, "loss": 0.9824, "step": 93 }, { "epoch": 0.018150222050588916, "grad_norm": 0.6660608649253845, "learning_rate": 9.073359073359074e-06, "loss": 0.894, "step": 94 }, { "epoch": 0.018343309519212203, "grad_norm": 0.7921581268310547, "learning_rate": 9.16988416988417e-06, "loss": 0.8626, "step": 95 }, { "epoch": 0.01853639698783549, "grad_norm": 1.1241141557693481, "learning_rate": 9.266409266409266e-06, "loss": 0.9245, "step": 96 }, { "epoch": 0.018729484456458775, "grad_norm": 0.6690508127212524, "learning_rate": 9.362934362934363e-06, "loss": 0.9049, "step": 97 }, { "epoch": 0.018922571925082062, "grad_norm": 0.5370510220527649, "learning_rate": 9.45945945945946e-06, "loss": 0.9876, "step": 98 }, { "epoch": 0.01911565939370535, "grad_norm": 0.7587014436721802, "learning_rate": 9.555984555984557e-06, "loss": 0.8376, "step": 99 }, { "epoch": 0.019308746862328634, "grad_norm": 0.787258505821228, "learning_rate": 9.652509652509653e-06, "loss": 0.9162, "step": 100 }, { "epoch": 0.01950183433095192, "grad_norm": 0.6462342143058777, "learning_rate": 9.74903474903475e-06, "loss": 0.9084, "step": 101 }, { "epoch": 0.01969492179957521, "grad_norm": 1.359196424484253, "learning_rate": 9.845559845559846e-06, "loss": 0.9014, "step": 102 }, { "epoch": 0.019888009268198493, "grad_norm": 0.6819143295288086, "learning_rate": 9.942084942084942e-06, "loss": 0.9198, "step": 103 }, { "epoch": 0.02008109673682178, "grad_norm": 1.050294041633606, "learning_rate": 1.0038610038610038e-05, "loss": 0.9768, "step": 104 }, { "epoch": 0.020274184205445068, "grad_norm": 1.0483160018920898, "learning_rate": 1.0135135135135136e-05, "loss": 0.9432, "step": 105 }, { "epoch": 0.020467271674068352, "grad_norm": 0.9592136144638062, "learning_rate": 1.0231660231660233e-05, "loss": 0.9374, "step": 106 }, { "epoch": 0.02066035914269164, "grad_norm": 0.8910746574401855, "learning_rate": 1.0328185328185329e-05, "loss": 0.8857, "step": 107 }, { "epoch": 0.020853446611314927, "grad_norm": 1.112828254699707, "learning_rate": 1.0424710424710425e-05, "loss": 0.8872, "step": 108 }, { "epoch": 0.02104653407993821, "grad_norm": 2.2271130084991455, "learning_rate": 1.0521235521235521e-05, "loss": 0.9841, "step": 109 }, { "epoch": 0.0212396215485615, "grad_norm": 0.7750412225723267, "learning_rate": 1.0617760617760618e-05, "loss": 0.9488, "step": 110 }, { "epoch": 0.021432709017184786, "grad_norm": 1.4586290121078491, "learning_rate": 1.0714285714285714e-05, "loss": 0.9122, "step": 111 }, { "epoch": 0.02162579648580807, "grad_norm": 3.518751621246338, "learning_rate": 1.0810810810810812e-05, "loss": 0.8748, "step": 112 }, { "epoch": 0.021818883954431358, "grad_norm": 0.6215397715568542, "learning_rate": 1.0907335907335908e-05, "loss": 0.8718, "step": 113 }, { "epoch": 0.022011971423054645, "grad_norm": 0.5350390076637268, "learning_rate": 1.1003861003861005e-05, "loss": 0.8796, "step": 114 }, { "epoch": 0.02220505889167793, "grad_norm": 0.7262043952941895, "learning_rate": 1.1100386100386101e-05, "loss": 0.8909, "step": 115 }, { "epoch": 0.022398146360301217, "grad_norm": 0.6588122844696045, "learning_rate": 1.1196911196911197e-05, "loss": 0.916, "step": 116 }, { "epoch": 0.022591233828924504, "grad_norm": 0.5475935339927673, "learning_rate": 1.1293436293436294e-05, "loss": 0.9513, "step": 117 }, { "epoch": 0.022784321297547788, "grad_norm": 0.6837526559829712, "learning_rate": 1.138996138996139e-05, "loss": 0.853, "step": 118 }, { "epoch": 0.022977408766171076, "grad_norm": 0.6509975790977478, "learning_rate": 1.1486486486486488e-05, "loss": 0.9087, "step": 119 }, { "epoch": 0.023170496234794363, "grad_norm": 0.6011126637458801, "learning_rate": 1.1583011583011584e-05, "loss": 0.9511, "step": 120 }, { "epoch": 0.023363583703417647, "grad_norm": 0.6402811408042908, "learning_rate": 1.167953667953668e-05, "loss": 0.9197, "step": 121 }, { "epoch": 0.023556671172040935, "grad_norm": 1.090778112411499, "learning_rate": 1.1776061776061777e-05, "loss": 0.8824, "step": 122 }, { "epoch": 0.023749758640664222, "grad_norm": 0.7196224331855774, "learning_rate": 1.1872586872586873e-05, "loss": 0.8805, "step": 123 }, { "epoch": 0.023942846109287506, "grad_norm": 1.3266594409942627, "learning_rate": 1.196911196911197e-05, "loss": 0.9055, "step": 124 }, { "epoch": 0.024135933577910794, "grad_norm": 0.6252379417419434, "learning_rate": 1.2065637065637066e-05, "loss": 0.9236, "step": 125 }, { "epoch": 0.02432902104653408, "grad_norm": 0.6404649019241333, "learning_rate": 1.2162162162162164e-05, "loss": 0.8588, "step": 126 }, { "epoch": 0.024522108515157365, "grad_norm": 1.0160189867019653, "learning_rate": 1.225868725868726e-05, "loss": 0.8787, "step": 127 }, { "epoch": 0.024715195983780653, "grad_norm": 0.7175147533416748, "learning_rate": 1.2355212355212356e-05, "loss": 0.8452, "step": 128 }, { "epoch": 0.02490828345240394, "grad_norm": 0.6418002247810364, "learning_rate": 1.2451737451737452e-05, "loss": 0.8968, "step": 129 }, { "epoch": 0.025101370921027225, "grad_norm": 0.6741524934768677, "learning_rate": 1.2548262548262549e-05, "loss": 0.884, "step": 130 }, { "epoch": 0.025294458389650512, "grad_norm": 1.026123285293579, "learning_rate": 1.2644787644787645e-05, "loss": 0.9733, "step": 131 }, { "epoch": 0.0254875458582738, "grad_norm": 0.7351776361465454, "learning_rate": 1.2741312741312741e-05, "loss": 0.9063, "step": 132 }, { "epoch": 0.025680633326897084, "grad_norm": 0.6679985523223877, "learning_rate": 1.2837837837837838e-05, "loss": 0.8813, "step": 133 }, { "epoch": 0.02587372079552037, "grad_norm": 0.7213040590286255, "learning_rate": 1.2934362934362934e-05, "loss": 0.8784, "step": 134 }, { "epoch": 0.02606680826414366, "grad_norm": 0.7113951444625854, "learning_rate": 1.303088803088803e-05, "loss": 0.862, "step": 135 }, { "epoch": 0.026259895732766943, "grad_norm": 0.731896698474884, "learning_rate": 1.3127413127413127e-05, "loss": 0.8814, "step": 136 }, { "epoch": 0.02645298320139023, "grad_norm": 0.7651190161705017, "learning_rate": 1.3223938223938226e-05, "loss": 0.887, "step": 137 }, { "epoch": 0.026646070670013518, "grad_norm": 0.7790947556495667, "learning_rate": 1.3320463320463322e-05, "loss": 0.921, "step": 138 }, { "epoch": 0.026839158138636802, "grad_norm": 0.8202561736106873, "learning_rate": 1.3416988416988419e-05, "loss": 0.8155, "step": 139 }, { "epoch": 0.02703224560726009, "grad_norm": 0.8106942772865295, "learning_rate": 1.3513513513513515e-05, "loss": 0.8637, "step": 140 }, { "epoch": 0.027225333075883377, "grad_norm": 0.5389417409896851, "learning_rate": 1.3610038610038611e-05, "loss": 0.9057, "step": 141 }, { "epoch": 0.02741842054450666, "grad_norm": 2.385514497756958, "learning_rate": 1.3706563706563708e-05, "loss": 0.8275, "step": 142 }, { "epoch": 0.02761150801312995, "grad_norm": 0.6623079776763916, "learning_rate": 1.3803088803088804e-05, "loss": 0.849, "step": 143 }, { "epoch": 0.027804595481753236, "grad_norm": 0.5735406875610352, "learning_rate": 1.38996138996139e-05, "loss": 0.7837, "step": 144 }, { "epoch": 0.02799768295037652, "grad_norm": 0.6306900978088379, "learning_rate": 1.3996138996138997e-05, "loss": 0.8044, "step": 145 }, { "epoch": 0.028190770418999807, "grad_norm": 1.527969479560852, "learning_rate": 1.4092664092664093e-05, "loss": 0.8475, "step": 146 }, { "epoch": 0.02838385788762309, "grad_norm": 0.7558512091636658, "learning_rate": 1.4189189189189189e-05, "loss": 0.8543, "step": 147 }, { "epoch": 0.02857694535624638, "grad_norm": 0.6967760920524597, "learning_rate": 1.4285714285714285e-05, "loss": 0.8803, "step": 148 }, { "epoch": 0.028770032824869667, "grad_norm": 0.5811948776245117, "learning_rate": 1.4382239382239382e-05, "loss": 0.8041, "step": 149 }, { "epoch": 0.02896312029349295, "grad_norm": 0.6023984551429749, "learning_rate": 1.4478764478764478e-05, "loss": 0.8453, "step": 150 }, { "epoch": 0.029156207762116238, "grad_norm": 0.7352141737937927, "learning_rate": 1.4575289575289574e-05, "loss": 0.78, "step": 151 }, { "epoch": 0.029349295230739526, "grad_norm": 1.1121176481246948, "learning_rate": 1.4671814671814674e-05, "loss": 0.8278, "step": 152 }, { "epoch": 0.02954238269936281, "grad_norm": 0.6076955199241638, "learning_rate": 1.476833976833977e-05, "loss": 0.9364, "step": 153 }, { "epoch": 0.029735470167986097, "grad_norm": 0.7215421199798584, "learning_rate": 1.4864864864864867e-05, "loss": 0.9179, "step": 154 }, { "epoch": 0.029928557636609385, "grad_norm": 0.7499887347221375, "learning_rate": 1.4961389961389963e-05, "loss": 0.9292, "step": 155 }, { "epoch": 0.03012164510523267, "grad_norm": 0.607318103313446, "learning_rate": 1.505791505791506e-05, "loss": 0.8233, "step": 156 }, { "epoch": 0.030314732573855956, "grad_norm": 0.6225395202636719, "learning_rate": 1.5154440154440155e-05, "loss": 0.8501, "step": 157 }, { "epoch": 0.030507820042479244, "grad_norm": 0.6800539493560791, "learning_rate": 1.5250965250965252e-05, "loss": 0.8173, "step": 158 }, { "epoch": 0.030700907511102528, "grad_norm": 0.6629009246826172, "learning_rate": 1.5347490347490348e-05, "loss": 0.852, "step": 159 }, { "epoch": 0.030893994979725815, "grad_norm": 0.6399476528167725, "learning_rate": 1.5444015444015444e-05, "loss": 0.9058, "step": 160 }, { "epoch": 0.031087082448349103, "grad_norm": 0.5240445137023926, "learning_rate": 1.554054054054054e-05, "loss": 0.848, "step": 161 }, { "epoch": 0.03128016991697239, "grad_norm": 1.4689232110977173, "learning_rate": 1.5637065637065637e-05, "loss": 0.8326, "step": 162 }, { "epoch": 0.03147325738559568, "grad_norm": 1.172482967376709, "learning_rate": 1.5733590733590733e-05, "loss": 0.7925, "step": 163 }, { "epoch": 0.03166634485421896, "grad_norm": 0.5028665661811829, "learning_rate": 1.583011583011583e-05, "loss": 0.7768, "step": 164 }, { "epoch": 0.031859432322842246, "grad_norm": 0.9853022694587708, "learning_rate": 1.5926640926640926e-05, "loss": 0.8934, "step": 165 }, { "epoch": 0.032052519791465534, "grad_norm": 0.9261530637741089, "learning_rate": 1.6023166023166026e-05, "loss": 0.7627, "step": 166 }, { "epoch": 0.03224560726008882, "grad_norm": 1.7357052564620972, "learning_rate": 1.6119691119691122e-05, "loss": 0.8436, "step": 167 }, { "epoch": 0.03243869472871211, "grad_norm": 0.8483394384384155, "learning_rate": 1.6216216216216218e-05, "loss": 0.8438, "step": 168 }, { "epoch": 0.032631782197335396, "grad_norm": 0.837378978729248, "learning_rate": 1.6312741312741314e-05, "loss": 0.8473, "step": 169 }, { "epoch": 0.03282486966595868, "grad_norm": 0.548952043056488, "learning_rate": 1.640926640926641e-05, "loss": 0.8468, "step": 170 }, { "epoch": 0.033017957134581964, "grad_norm": 0.8100823760032654, "learning_rate": 1.6505791505791507e-05, "loss": 0.8608, "step": 171 }, { "epoch": 0.03321104460320525, "grad_norm": 0.7120270133018494, "learning_rate": 1.6602316602316603e-05, "loss": 0.9042, "step": 172 }, { "epoch": 0.03340413207182854, "grad_norm": 0.8838785290718079, "learning_rate": 1.66988416988417e-05, "loss": 0.8227, "step": 173 }, { "epoch": 0.03359721954045183, "grad_norm": 1.1956639289855957, "learning_rate": 1.6795366795366796e-05, "loss": 0.8783, "step": 174 }, { "epoch": 0.033790307009075114, "grad_norm": 0.6007021069526672, "learning_rate": 1.6891891891891892e-05, "loss": 0.8252, "step": 175 }, { "epoch": 0.033983394477698395, "grad_norm": 0.5708699822425842, "learning_rate": 1.698841698841699e-05, "loss": 0.8543, "step": 176 }, { "epoch": 0.03417648194632168, "grad_norm": 0.9188433885574341, "learning_rate": 1.7084942084942085e-05, "loss": 0.8155, "step": 177 }, { "epoch": 0.03436956941494497, "grad_norm": 0.6138718128204346, "learning_rate": 1.718146718146718e-05, "loss": 0.8098, "step": 178 }, { "epoch": 0.03456265688356826, "grad_norm": 1.0639415979385376, "learning_rate": 1.7277992277992277e-05, "loss": 0.8868, "step": 179 }, { "epoch": 0.034755744352191545, "grad_norm": 0.5995820760726929, "learning_rate": 1.7374517374517377e-05, "loss": 0.8831, "step": 180 }, { "epoch": 0.03494883182081483, "grad_norm": 0.6621062159538269, "learning_rate": 1.7471042471042473e-05, "loss": 0.8224, "step": 181 }, { "epoch": 0.03514191928943811, "grad_norm": 0.9162173271179199, "learning_rate": 1.756756756756757e-05, "loss": 0.7921, "step": 182 }, { "epoch": 0.0353350067580614, "grad_norm": 5.1173505783081055, "learning_rate": 1.7664092664092666e-05, "loss": 0.8216, "step": 183 }, { "epoch": 0.03552809422668469, "grad_norm": 0.5913546085357666, "learning_rate": 1.7760617760617762e-05, "loss": 0.849, "step": 184 }, { "epoch": 0.035721181695307976, "grad_norm": 2.5334718227386475, "learning_rate": 1.785714285714286e-05, "loss": 0.8331, "step": 185 }, { "epoch": 0.03591426916393126, "grad_norm": 0.7598941922187805, "learning_rate": 1.7953667953667955e-05, "loss": 0.8272, "step": 186 }, { "epoch": 0.03610735663255455, "grad_norm": 0.7474730610847473, "learning_rate": 1.805019305019305e-05, "loss": 0.8048, "step": 187 }, { "epoch": 0.03630044410117783, "grad_norm": 2.7953217029571533, "learning_rate": 1.8146718146718147e-05, "loss": 0.8451, "step": 188 }, { "epoch": 0.03649353156980112, "grad_norm": 0.6779459714889526, "learning_rate": 1.8243243243243244e-05, "loss": 0.7735, "step": 189 }, { "epoch": 0.036686619038424406, "grad_norm": 0.6318115592002869, "learning_rate": 1.833976833976834e-05, "loss": 0.7858, "step": 190 }, { "epoch": 0.036879706507047694, "grad_norm": 0.7573837041854858, "learning_rate": 1.8436293436293436e-05, "loss": 0.8586, "step": 191 }, { "epoch": 0.03707279397567098, "grad_norm": 0.6934481263160706, "learning_rate": 1.8532818532818533e-05, "loss": 0.7874, "step": 192 }, { "epoch": 0.03726588144429427, "grad_norm": 1.4233558177947998, "learning_rate": 1.862934362934363e-05, "loss": 0.8125, "step": 193 }, { "epoch": 0.03745896891291755, "grad_norm": 0.6558254361152649, "learning_rate": 1.8725868725868725e-05, "loss": 0.8604, "step": 194 }, { "epoch": 0.03765205638154084, "grad_norm": 0.5977687835693359, "learning_rate": 1.8822393822393825e-05, "loss": 0.8385, "step": 195 }, { "epoch": 0.037845143850164124, "grad_norm": 0.8655466437339783, "learning_rate": 1.891891891891892e-05, "loss": 0.7591, "step": 196 }, { "epoch": 0.03803823131878741, "grad_norm": 0.5936390161514282, "learning_rate": 1.9015444015444017e-05, "loss": 0.7964, "step": 197 }, { "epoch": 0.0382313187874107, "grad_norm": 0.8284990787506104, "learning_rate": 1.9111969111969114e-05, "loss": 0.889, "step": 198 }, { "epoch": 0.03842440625603398, "grad_norm": 4.39487886428833, "learning_rate": 1.920849420849421e-05, "loss": 0.8066, "step": 199 }, { "epoch": 0.03861749372465727, "grad_norm": 0.9131956100463867, "learning_rate": 1.9305019305019306e-05, "loss": 0.8662, "step": 200 }, { "epoch": 0.038810581193280555, "grad_norm": 1.0108263492584229, "learning_rate": 1.9401544401544403e-05, "loss": 0.8148, "step": 201 }, { "epoch": 0.03900366866190384, "grad_norm": 0.780716598033905, "learning_rate": 1.94980694980695e-05, "loss": 0.8098, "step": 202 }, { "epoch": 0.03919675613052713, "grad_norm": 1.1168111562728882, "learning_rate": 1.9594594594594595e-05, "loss": 0.8124, "step": 203 }, { "epoch": 0.03938984359915042, "grad_norm": 0.7475872039794922, "learning_rate": 1.969111969111969e-05, "loss": 0.8128, "step": 204 }, { "epoch": 0.0395829310677737, "grad_norm": 1.0382946729660034, "learning_rate": 1.9787644787644788e-05, "loss": 0.8171, "step": 205 }, { "epoch": 0.039776018536396986, "grad_norm": 0.7898650169372559, "learning_rate": 1.9884169884169884e-05, "loss": 0.8313, "step": 206 }, { "epoch": 0.03996910600502027, "grad_norm": 0.6575832962989807, "learning_rate": 1.998069498069498e-05, "loss": 0.7551, "step": 207 }, { "epoch": 0.04016219347364356, "grad_norm": 0.9001128077507019, "learning_rate": 2.0077220077220077e-05, "loss": 0.793, "step": 208 }, { "epoch": 0.04035528094226685, "grad_norm": 0.7719081044197083, "learning_rate": 2.0173745173745176e-05, "loss": 0.766, "step": 209 }, { "epoch": 0.040548368410890136, "grad_norm": 0.6570479869842529, "learning_rate": 2.0270270270270273e-05, "loss": 0.817, "step": 210 }, { "epoch": 0.040741455879513416, "grad_norm": 0.6649913191795349, "learning_rate": 2.036679536679537e-05, "loss": 0.7894, "step": 211 }, { "epoch": 0.040934543348136704, "grad_norm": 1.1608506441116333, "learning_rate": 2.0463320463320465e-05, "loss": 0.9046, "step": 212 }, { "epoch": 0.04112763081675999, "grad_norm": 0.6409295201301575, "learning_rate": 2.055984555984556e-05, "loss": 0.8317, "step": 213 }, { "epoch": 0.04132071828538328, "grad_norm": 0.9779261946678162, "learning_rate": 2.0656370656370658e-05, "loss": 0.7851, "step": 214 }, { "epoch": 0.041513805754006566, "grad_norm": 0.6397377252578735, "learning_rate": 2.0752895752895754e-05, "loss": 0.7405, "step": 215 }, { "epoch": 0.041706893222629854, "grad_norm": 0.805662214756012, "learning_rate": 2.084942084942085e-05, "loss": 0.7688, "step": 216 }, { "epoch": 0.041899980691253134, "grad_norm": 0.7562344670295715, "learning_rate": 2.0945945945945947e-05, "loss": 0.8274, "step": 217 }, { "epoch": 0.04209306815987642, "grad_norm": 0.6757524013519287, "learning_rate": 2.1042471042471043e-05, "loss": 0.8036, "step": 218 }, { "epoch": 0.04228615562849971, "grad_norm": 0.6101422905921936, "learning_rate": 2.113899613899614e-05, "loss": 0.8288, "step": 219 }, { "epoch": 0.042479243097123, "grad_norm": 1.8889611959457397, "learning_rate": 2.1235521235521236e-05, "loss": 0.7842, "step": 220 }, { "epoch": 0.042672330565746285, "grad_norm": 1.1263983249664307, "learning_rate": 2.1332046332046332e-05, "loss": 0.7342, "step": 221 }, { "epoch": 0.04286541803436957, "grad_norm": 1.0929330587387085, "learning_rate": 2.1428571428571428e-05, "loss": 0.8605, "step": 222 }, { "epoch": 0.04305850550299285, "grad_norm": 0.7305883169174194, "learning_rate": 2.1525096525096524e-05, "loss": 0.7797, "step": 223 }, { "epoch": 0.04325159297161614, "grad_norm": 1.0881516933441162, "learning_rate": 2.1621621621621624e-05, "loss": 0.7513, "step": 224 }, { "epoch": 0.04344468044023943, "grad_norm": 0.7797479629516602, "learning_rate": 2.171814671814672e-05, "loss": 0.7143, "step": 225 }, { "epoch": 0.043637767908862715, "grad_norm": 1.1314635276794434, "learning_rate": 2.1814671814671817e-05, "loss": 0.748, "step": 226 }, { "epoch": 0.043830855377486, "grad_norm": 0.7506555318832397, "learning_rate": 2.1911196911196913e-05, "loss": 0.7791, "step": 227 }, { "epoch": 0.04402394284610929, "grad_norm": 0.8900340795516968, "learning_rate": 2.200772200772201e-05, "loss": 0.7894, "step": 228 }, { "epoch": 0.04421703031473257, "grad_norm": 0.5918322801589966, "learning_rate": 2.2104247104247106e-05, "loss": 0.8291, "step": 229 }, { "epoch": 0.04441011778335586, "grad_norm": 1.1213475465774536, "learning_rate": 2.2200772200772202e-05, "loss": 0.8107, "step": 230 }, { "epoch": 0.044603205251979146, "grad_norm": 0.6655052900314331, "learning_rate": 2.2297297297297298e-05, "loss": 0.8319, "step": 231 }, { "epoch": 0.04479629272060243, "grad_norm": 0.9097509980201721, "learning_rate": 2.2393822393822394e-05, "loss": 0.7379, "step": 232 }, { "epoch": 0.04498938018922572, "grad_norm": 0.7087088823318481, "learning_rate": 2.249034749034749e-05, "loss": 0.7936, "step": 233 }, { "epoch": 0.04518246765784901, "grad_norm": 1.281230092048645, "learning_rate": 2.2586872586872587e-05, "loss": 0.7881, "step": 234 }, { "epoch": 0.04537555512647229, "grad_norm": 0.7143283486366272, "learning_rate": 2.2683397683397683e-05, "loss": 0.8421, "step": 235 }, { "epoch": 0.045568642595095576, "grad_norm": 0.8116339445114136, "learning_rate": 2.277992277992278e-05, "loss": 0.8175, "step": 236 }, { "epoch": 0.045761730063718864, "grad_norm": 0.7786933779716492, "learning_rate": 2.2876447876447876e-05, "loss": 0.7971, "step": 237 }, { "epoch": 0.04595481753234215, "grad_norm": 0.7483916282653809, "learning_rate": 2.2972972972972976e-05, "loss": 0.824, "step": 238 }, { "epoch": 0.04614790500096544, "grad_norm": 0.7526113986968994, "learning_rate": 2.3069498069498072e-05, "loss": 0.8029, "step": 239 }, { "epoch": 0.04634099246958873, "grad_norm": 0.7758153676986694, "learning_rate": 2.3166023166023168e-05, "loss": 0.7468, "step": 240 }, { "epoch": 0.04653407993821201, "grad_norm": 0.8226968050003052, "learning_rate": 2.3262548262548265e-05, "loss": 0.797, "step": 241 }, { "epoch": 0.046727167406835295, "grad_norm": 0.6875340938568115, "learning_rate": 2.335907335907336e-05, "loss": 0.8675, "step": 242 }, { "epoch": 0.04692025487545858, "grad_norm": 0.9105044603347778, "learning_rate": 2.3455598455598457e-05, "loss": 0.7543, "step": 243 }, { "epoch": 0.04711334234408187, "grad_norm": 7.299250602722168, "learning_rate": 2.3552123552123553e-05, "loss": 0.7765, "step": 244 }, { "epoch": 0.04730642981270516, "grad_norm": 0.8509519100189209, "learning_rate": 2.364864864864865e-05, "loss": 0.7867, "step": 245 }, { "epoch": 0.047499517281328445, "grad_norm": 0.64810711145401, "learning_rate": 2.3745173745173746e-05, "loss": 0.807, "step": 246 }, { "epoch": 0.047692604749951725, "grad_norm": 0.8290013670921326, "learning_rate": 2.3841698841698842e-05, "loss": 0.8588, "step": 247 }, { "epoch": 0.04788569221857501, "grad_norm": 0.851947546005249, "learning_rate": 2.393822393822394e-05, "loss": 0.7589, "step": 248 }, { "epoch": 0.0480787796871983, "grad_norm": 0.8205694556236267, "learning_rate": 2.4034749034749035e-05, "loss": 0.8355, "step": 249 }, { "epoch": 0.04827186715582159, "grad_norm": 1.130135416984558, "learning_rate": 2.413127413127413e-05, "loss": 0.7436, "step": 250 }, { "epoch": 0.048464954624444875, "grad_norm": 0.6749304533004761, "learning_rate": 2.4227799227799227e-05, "loss": 0.8492, "step": 251 }, { "epoch": 0.04865804209306816, "grad_norm": 1.0572521686553955, "learning_rate": 2.4324324324324327e-05, "loss": 0.8466, "step": 252 }, { "epoch": 0.04885112956169144, "grad_norm": 0.8953571915626526, "learning_rate": 2.4420849420849423e-05, "loss": 0.8056, "step": 253 }, { "epoch": 0.04904421703031473, "grad_norm": 0.863225519657135, "learning_rate": 2.451737451737452e-05, "loss": 0.8192, "step": 254 }, { "epoch": 0.04923730449893802, "grad_norm": 0.8658750653266907, "learning_rate": 2.4613899613899616e-05, "loss": 0.7722, "step": 255 }, { "epoch": 0.049430391967561306, "grad_norm": 0.9667727947235107, "learning_rate": 2.4710424710424712e-05, "loss": 0.7625, "step": 256 }, { "epoch": 0.049623479436184594, "grad_norm": 0.7106841206550598, "learning_rate": 2.480694980694981e-05, "loss": 0.766, "step": 257 }, { "epoch": 0.04981656690480788, "grad_norm": 0.7659792900085449, "learning_rate": 2.4903474903474905e-05, "loss": 0.8137, "step": 258 }, { "epoch": 0.05000965437343116, "grad_norm": 1.0975064039230347, "learning_rate": 2.5e-05, "loss": 0.8745, "step": 259 }, { "epoch": 0.05020274184205445, "grad_norm": 0.7798373699188232, "learning_rate": 2.5096525096525097e-05, "loss": 0.7453, "step": 260 }, { "epoch": 0.05039582931067774, "grad_norm": 1.3059476613998413, "learning_rate": 2.5193050193050194e-05, "loss": 0.7905, "step": 261 }, { "epoch": 0.050588916779301024, "grad_norm": 0.8273429870605469, "learning_rate": 2.528957528957529e-05, "loss": 0.8137, "step": 262 }, { "epoch": 0.05078200424792431, "grad_norm": 1.0106010437011719, "learning_rate": 2.5386100386100386e-05, "loss": 0.7845, "step": 263 }, { "epoch": 0.0509750917165476, "grad_norm": 0.6267977356910706, "learning_rate": 2.5482625482625483e-05, "loss": 0.8179, "step": 264 }, { "epoch": 0.05116817918517088, "grad_norm": 0.6841793060302734, "learning_rate": 2.557915057915058e-05, "loss": 0.742, "step": 265 }, { "epoch": 0.05136126665379417, "grad_norm": 1.0548688173294067, "learning_rate": 2.5675675675675675e-05, "loss": 0.7493, "step": 266 }, { "epoch": 0.051554354122417455, "grad_norm": 0.9038710594177246, "learning_rate": 2.577220077220077e-05, "loss": 0.7675, "step": 267 }, { "epoch": 0.05174744159104074, "grad_norm": 0.6843962073326111, "learning_rate": 2.5868725868725868e-05, "loss": 0.6471, "step": 268 }, { "epoch": 0.05194052905966403, "grad_norm": 0.8368706107139587, "learning_rate": 2.5965250965250964e-05, "loss": 0.858, "step": 269 }, { "epoch": 0.05213361652828732, "grad_norm": 0.8310431241989136, "learning_rate": 2.606177606177606e-05, "loss": 0.7891, "step": 270 }, { "epoch": 0.0523267039969106, "grad_norm": 0.874569296836853, "learning_rate": 2.6158301158301157e-05, "loss": 0.7632, "step": 271 }, { "epoch": 0.052519791465533885, "grad_norm": 0.8221341371536255, "learning_rate": 2.6254826254826253e-05, "loss": 0.7618, "step": 272 }, { "epoch": 0.05271287893415717, "grad_norm": 0.8894712924957275, "learning_rate": 2.635135135135135e-05, "loss": 0.7819, "step": 273 }, { "epoch": 0.05290596640278046, "grad_norm": 0.6545665264129639, "learning_rate": 2.6447876447876452e-05, "loss": 0.7796, "step": 274 }, { "epoch": 0.05309905387140375, "grad_norm": 1.2378829717636108, "learning_rate": 2.654440154440155e-05, "loss": 0.7671, "step": 275 }, { "epoch": 0.053292141340027036, "grad_norm": 1.003044605255127, "learning_rate": 2.6640926640926645e-05, "loss": 0.7691, "step": 276 }, { "epoch": 0.053485228808650316, "grad_norm": 0.6534599661827087, "learning_rate": 2.673745173745174e-05, "loss": 0.8002, "step": 277 }, { "epoch": 0.053678316277273604, "grad_norm": 0.9556663632392883, "learning_rate": 2.6833976833976838e-05, "loss": 0.7924, "step": 278 }, { "epoch": 0.05387140374589689, "grad_norm": 1.0294713973999023, "learning_rate": 2.6930501930501934e-05, "loss": 0.7482, "step": 279 }, { "epoch": 0.05406449121452018, "grad_norm": 0.5893766283988953, "learning_rate": 2.702702702702703e-05, "loss": 0.7519, "step": 280 }, { "epoch": 0.054257578683143466, "grad_norm": 0.8069927096366882, "learning_rate": 2.7123552123552126e-05, "loss": 0.7413, "step": 281 }, { "epoch": 0.054450666151766754, "grad_norm": 0.7717078924179077, "learning_rate": 2.7220077220077223e-05, "loss": 0.7398, "step": 282 }, { "epoch": 0.054643753620390034, "grad_norm": 0.9112987518310547, "learning_rate": 2.731660231660232e-05, "loss": 0.8279, "step": 283 }, { "epoch": 0.05483684108901332, "grad_norm": 0.9186720848083496, "learning_rate": 2.7413127413127415e-05, "loss": 0.7987, "step": 284 }, { "epoch": 0.05502992855763661, "grad_norm": 0.9338028430938721, "learning_rate": 2.750965250965251e-05, "loss": 0.7699, "step": 285 }, { "epoch": 0.0552230160262599, "grad_norm": 0.709706723690033, "learning_rate": 2.7606177606177608e-05, "loss": 0.7654, "step": 286 }, { "epoch": 0.055416103494883184, "grad_norm": 1.1294058561325073, "learning_rate": 2.7702702702702704e-05, "loss": 0.7901, "step": 287 }, { "epoch": 0.05560919096350647, "grad_norm": 0.9902518391609192, "learning_rate": 2.77992277992278e-05, "loss": 0.7801, "step": 288 }, { "epoch": 0.05580227843212975, "grad_norm": 0.6824255585670471, "learning_rate": 2.7895752895752897e-05, "loss": 0.7717, "step": 289 }, { "epoch": 0.05599536590075304, "grad_norm": 1.3504962921142578, "learning_rate": 2.7992277992277993e-05, "loss": 0.7874, "step": 290 }, { "epoch": 0.05618845336937633, "grad_norm": 0.8294904828071594, "learning_rate": 2.808880308880309e-05, "loss": 0.818, "step": 291 }, { "epoch": 0.056381540837999615, "grad_norm": 0.7337765097618103, "learning_rate": 2.8185328185328186e-05, "loss": 0.7472, "step": 292 }, { "epoch": 0.0565746283066229, "grad_norm": 0.7165339589118958, "learning_rate": 2.8281853281853282e-05, "loss": 0.7961, "step": 293 }, { "epoch": 0.05676771577524618, "grad_norm": 3.9082493782043457, "learning_rate": 2.8378378378378378e-05, "loss": 0.8637, "step": 294 }, { "epoch": 0.05696080324386947, "grad_norm": 0.956221342086792, "learning_rate": 2.8474903474903475e-05, "loss": 0.741, "step": 295 }, { "epoch": 0.05715389071249276, "grad_norm": 0.7590420842170715, "learning_rate": 2.857142857142857e-05, "loss": 0.7668, "step": 296 }, { "epoch": 0.057346978181116046, "grad_norm": 0.7284662127494812, "learning_rate": 2.8667953667953667e-05, "loss": 0.7226, "step": 297 }, { "epoch": 0.05754006564973933, "grad_norm": 0.6827973127365112, "learning_rate": 2.8764478764478763e-05, "loss": 0.7817, "step": 298 }, { "epoch": 0.05773315311836262, "grad_norm": 0.800449013710022, "learning_rate": 2.886100386100386e-05, "loss": 0.719, "step": 299 }, { "epoch": 0.0579262405869859, "grad_norm": 0.6916401982307434, "learning_rate": 2.8957528957528956e-05, "loss": 0.8012, "step": 300 }, { "epoch": 0.05811932805560919, "grad_norm": 1.1123872995376587, "learning_rate": 2.9054054054054052e-05, "loss": 0.7408, "step": 301 }, { "epoch": 0.058312415524232476, "grad_norm": 0.625295102596283, "learning_rate": 2.915057915057915e-05, "loss": 0.8097, "step": 302 }, { "epoch": 0.058505502992855764, "grad_norm": 0.8107043504714966, "learning_rate": 2.9247104247104252e-05, "loss": 0.7482, "step": 303 }, { "epoch": 0.05869859046147905, "grad_norm": 0.7700033783912659, "learning_rate": 2.9343629343629348e-05, "loss": 0.8145, "step": 304 }, { "epoch": 0.05889167793010234, "grad_norm": 0.6443881988525391, "learning_rate": 2.9440154440154444e-05, "loss": 0.7757, "step": 305 }, { "epoch": 0.05908476539872562, "grad_norm": 0.6954162120819092, "learning_rate": 2.953667953667954e-05, "loss": 0.7366, "step": 306 }, { "epoch": 0.05927785286734891, "grad_norm": 0.6980789303779602, "learning_rate": 2.9633204633204637e-05, "loss": 0.7431, "step": 307 }, { "epoch": 0.059470940335972194, "grad_norm": 2.9025230407714844, "learning_rate": 2.9729729729729733e-05, "loss": 0.7871, "step": 308 }, { "epoch": 0.05966402780459548, "grad_norm": 0.9477601051330566, "learning_rate": 2.982625482625483e-05, "loss": 0.725, "step": 309 }, { "epoch": 0.05985711527321877, "grad_norm": 0.7140200734138489, "learning_rate": 2.9922779922779926e-05, "loss": 0.7625, "step": 310 }, { "epoch": 0.06005020274184206, "grad_norm": 0.7639788389205933, "learning_rate": 3.0019305019305022e-05, "loss": 0.7679, "step": 311 }, { "epoch": 0.06024329021046534, "grad_norm": 0.9293612837791443, "learning_rate": 3.011583011583012e-05, "loss": 0.732, "step": 312 }, { "epoch": 0.060436377679088625, "grad_norm": 0.8023565411567688, "learning_rate": 3.0212355212355215e-05, "loss": 0.7487, "step": 313 }, { "epoch": 0.06062946514771191, "grad_norm": 0.7616268396377563, "learning_rate": 3.030888030888031e-05, "loss": 0.8078, "step": 314 }, { "epoch": 0.0608225526163352, "grad_norm": 0.7697499394416809, "learning_rate": 3.0405405405405407e-05, "loss": 0.7237, "step": 315 }, { "epoch": 0.06101564008495849, "grad_norm": 0.6460227966308594, "learning_rate": 3.0501930501930504e-05, "loss": 0.8877, "step": 316 }, { "epoch": 0.061208727553581775, "grad_norm": 0.6933693289756775, "learning_rate": 3.05984555984556e-05, "loss": 0.8265, "step": 317 }, { "epoch": 0.061401815022205056, "grad_norm": 0.7685636281967163, "learning_rate": 3.0694980694980696e-05, "loss": 0.8095, "step": 318 }, { "epoch": 0.06159490249082834, "grad_norm": 2.1854889392852783, "learning_rate": 3.0791505791505796e-05, "loss": 0.7545, "step": 319 }, { "epoch": 0.06178798995945163, "grad_norm": 0.8264992833137512, "learning_rate": 3.088803088803089e-05, "loss": 0.7864, "step": 320 }, { "epoch": 0.06198107742807492, "grad_norm": 1.2340149879455566, "learning_rate": 3.098455598455599e-05, "loss": 0.7508, "step": 321 }, { "epoch": 0.062174164896698206, "grad_norm": 1.4597169160842896, "learning_rate": 3.108108108108108e-05, "loss": 0.6927, "step": 322 }, { "epoch": 0.06236725236532149, "grad_norm": 0.7395432591438293, "learning_rate": 3.117760617760618e-05, "loss": 0.7621, "step": 323 }, { "epoch": 0.06256033983394478, "grad_norm": 0.7577383518218994, "learning_rate": 3.1274131274131274e-05, "loss": 0.726, "step": 324 }, { "epoch": 0.06275342730256807, "grad_norm": 0.6495018601417542, "learning_rate": 3.1370656370656374e-05, "loss": 0.708, "step": 325 }, { "epoch": 0.06294651477119136, "grad_norm": 0.7194137573242188, "learning_rate": 3.1467181467181466e-05, "loss": 0.7327, "step": 326 }, { "epoch": 0.06313960223981463, "grad_norm": 0.692668616771698, "learning_rate": 3.1563706563706566e-05, "loss": 0.7457, "step": 327 }, { "epoch": 0.06333268970843792, "grad_norm": 1.124739646911621, "learning_rate": 3.166023166023166e-05, "loss": 0.7525, "step": 328 }, { "epoch": 0.0635257771770612, "grad_norm": 0.5831942558288574, "learning_rate": 3.175675675675676e-05, "loss": 0.8027, "step": 329 }, { "epoch": 0.06371886464568449, "grad_norm": 1.189865231513977, "learning_rate": 3.185328185328185e-05, "loss": 0.7255, "step": 330 }, { "epoch": 0.06391195211430778, "grad_norm": 1.0192172527313232, "learning_rate": 3.194980694980695e-05, "loss": 0.7223, "step": 331 }, { "epoch": 0.06410503958293107, "grad_norm": 0.6218483448028564, "learning_rate": 3.204633204633205e-05, "loss": 0.7226, "step": 332 }, { "epoch": 0.06429812705155435, "grad_norm": 0.6673777103424072, "learning_rate": 3.2142857142857144e-05, "loss": 0.7991, "step": 333 }, { "epoch": 0.06449121452017764, "grad_norm": 0.5821337699890137, "learning_rate": 3.2239382239382244e-05, "loss": 0.7579, "step": 334 }, { "epoch": 0.06468430198880093, "grad_norm": 1.4223250150680542, "learning_rate": 3.2335907335907337e-05, "loss": 0.7586, "step": 335 }, { "epoch": 0.06487738945742422, "grad_norm": 0.8064385056495667, "learning_rate": 3.2432432432432436e-05, "loss": 0.8019, "step": 336 }, { "epoch": 0.0650704769260475, "grad_norm": 1.1344062089920044, "learning_rate": 3.252895752895753e-05, "loss": 0.7667, "step": 337 }, { "epoch": 0.06526356439467079, "grad_norm": 0.8954115509986877, "learning_rate": 3.262548262548263e-05, "loss": 0.716, "step": 338 }, { "epoch": 0.06545665186329407, "grad_norm": 0.6899580359458923, "learning_rate": 3.272200772200772e-05, "loss": 0.8275, "step": 339 }, { "epoch": 0.06564973933191735, "grad_norm": 0.7266262173652649, "learning_rate": 3.281853281853282e-05, "loss": 0.7706, "step": 340 }, { "epoch": 0.06584282680054064, "grad_norm": 0.7876018285751343, "learning_rate": 3.2915057915057914e-05, "loss": 0.7661, "step": 341 }, { "epoch": 0.06603591426916393, "grad_norm": 0.7357893586158752, "learning_rate": 3.3011583011583014e-05, "loss": 0.7664, "step": 342 }, { "epoch": 0.06622900173778722, "grad_norm": 1.0206198692321777, "learning_rate": 3.310810810810811e-05, "loss": 0.7655, "step": 343 }, { "epoch": 0.0664220892064105, "grad_norm": 0.8032231330871582, "learning_rate": 3.3204633204633207e-05, "loss": 0.8012, "step": 344 }, { "epoch": 0.06661517667503379, "grad_norm": 0.6950976848602295, "learning_rate": 3.33011583011583e-05, "loss": 0.7462, "step": 345 }, { "epoch": 0.06680826414365708, "grad_norm": 0.6307368874549866, "learning_rate": 3.33976833976834e-05, "loss": 0.7056, "step": 346 }, { "epoch": 0.06700135161228037, "grad_norm": 0.7969343662261963, "learning_rate": 3.34942084942085e-05, "loss": 0.6847, "step": 347 }, { "epoch": 0.06719443908090365, "grad_norm": 0.7483661770820618, "learning_rate": 3.359073359073359e-05, "loss": 0.7393, "step": 348 }, { "epoch": 0.06738752654952694, "grad_norm": 1.5181164741516113, "learning_rate": 3.368725868725869e-05, "loss": 0.8318, "step": 349 }, { "epoch": 0.06758061401815023, "grad_norm": 0.7736886739730835, "learning_rate": 3.3783783783783784e-05, "loss": 0.7189, "step": 350 }, { "epoch": 0.0677737014867735, "grad_norm": 0.9670917987823486, "learning_rate": 3.3880308880308884e-05, "loss": 0.8119, "step": 351 }, { "epoch": 0.06796678895539679, "grad_norm": 0.9172984957695007, "learning_rate": 3.397683397683398e-05, "loss": 0.7327, "step": 352 }, { "epoch": 0.06815987642402008, "grad_norm": 0.5462597012519836, "learning_rate": 3.4073359073359077e-05, "loss": 0.691, "step": 353 }, { "epoch": 0.06835296389264336, "grad_norm": 1.1080247163772583, "learning_rate": 3.416988416988417e-05, "loss": 0.7557, "step": 354 }, { "epoch": 0.06854605136126665, "grad_norm": 0.566271185874939, "learning_rate": 3.426640926640927e-05, "loss": 0.7488, "step": 355 }, { "epoch": 0.06873913882988994, "grad_norm": 0.7389577627182007, "learning_rate": 3.436293436293436e-05, "loss": 0.7061, "step": 356 }, { "epoch": 0.06893222629851323, "grad_norm": 0.5654365420341492, "learning_rate": 3.445945945945946e-05, "loss": 0.816, "step": 357 }, { "epoch": 0.06912531376713651, "grad_norm": 0.8060508370399475, "learning_rate": 3.4555984555984555e-05, "loss": 0.7072, "step": 358 }, { "epoch": 0.0693184012357598, "grad_norm": 1.1870709657669067, "learning_rate": 3.4652509652509654e-05, "loss": 0.7201, "step": 359 }, { "epoch": 0.06951148870438309, "grad_norm": 0.8069962859153748, "learning_rate": 3.4749034749034754e-05, "loss": 0.6817, "step": 360 }, { "epoch": 0.06970457617300638, "grad_norm": 0.5921075344085693, "learning_rate": 3.484555984555985e-05, "loss": 0.7759, "step": 361 }, { "epoch": 0.06989766364162966, "grad_norm": 0.5424115657806396, "learning_rate": 3.4942084942084947e-05, "loss": 0.7832, "step": 362 }, { "epoch": 0.07009075111025294, "grad_norm": 0.7442657351493835, "learning_rate": 3.503861003861004e-05, "loss": 0.7897, "step": 363 }, { "epoch": 0.07028383857887623, "grad_norm": 0.6358199715614319, "learning_rate": 3.513513513513514e-05, "loss": 0.7337, "step": 364 }, { "epoch": 0.07047692604749951, "grad_norm": 0.697826087474823, "learning_rate": 3.523166023166023e-05, "loss": 0.7212, "step": 365 }, { "epoch": 0.0706700135161228, "grad_norm": 0.7534604668617249, "learning_rate": 3.532818532818533e-05, "loss": 0.7607, "step": 366 }, { "epoch": 0.07086310098474609, "grad_norm": 2.3232758045196533, "learning_rate": 3.5424710424710425e-05, "loss": 0.7153, "step": 367 }, { "epoch": 0.07105618845336938, "grad_norm": 0.5292892456054688, "learning_rate": 3.5521235521235524e-05, "loss": 0.7114, "step": 368 }, { "epoch": 0.07124927592199266, "grad_norm": 0.6919161081314087, "learning_rate": 3.561776061776062e-05, "loss": 0.7824, "step": 369 }, { "epoch": 0.07144236339061595, "grad_norm": 0.6590285897254944, "learning_rate": 3.571428571428572e-05, "loss": 0.7089, "step": 370 }, { "epoch": 0.07163545085923924, "grad_norm": 0.9062819480895996, "learning_rate": 3.581081081081081e-05, "loss": 0.7461, "step": 371 }, { "epoch": 0.07182853832786253, "grad_norm": 0.6124919056892395, "learning_rate": 3.590733590733591e-05, "loss": 0.7608, "step": 372 }, { "epoch": 0.07202162579648581, "grad_norm": 1.1038275957107544, "learning_rate": 3.6003861003861e-05, "loss": 0.7017, "step": 373 }, { "epoch": 0.0722147132651091, "grad_norm": 0.6069640517234802, "learning_rate": 3.61003861003861e-05, "loss": 0.7674, "step": 374 }, { "epoch": 0.07240780073373237, "grad_norm": 0.6402168273925781, "learning_rate": 3.61969111969112e-05, "loss": 0.7461, "step": 375 }, { "epoch": 0.07260088820235566, "grad_norm": 0.713346004486084, "learning_rate": 3.6293436293436295e-05, "loss": 0.7076, "step": 376 }, { "epoch": 0.07279397567097895, "grad_norm": 0.967926561832428, "learning_rate": 3.6389961389961394e-05, "loss": 0.7461, "step": 377 }, { "epoch": 0.07298706313960224, "grad_norm": 0.6534168720245361, "learning_rate": 3.648648648648649e-05, "loss": 0.7941, "step": 378 }, { "epoch": 0.07318015060822552, "grad_norm": 0.7813475728034973, "learning_rate": 3.658301158301159e-05, "loss": 0.7773, "step": 379 }, { "epoch": 0.07337323807684881, "grad_norm": 0.8636813163757324, "learning_rate": 3.667953667953668e-05, "loss": 0.7052, "step": 380 }, { "epoch": 0.0735663255454721, "grad_norm": 0.6508225798606873, "learning_rate": 3.677606177606178e-05, "loss": 0.7386, "step": 381 }, { "epoch": 0.07375941301409539, "grad_norm": 1.1163290739059448, "learning_rate": 3.687258687258687e-05, "loss": 0.77, "step": 382 }, { "epoch": 0.07395250048271867, "grad_norm": 0.5024898052215576, "learning_rate": 3.696911196911197e-05, "loss": 0.7478, "step": 383 }, { "epoch": 0.07414558795134196, "grad_norm": 0.7462846040725708, "learning_rate": 3.7065637065637065e-05, "loss": 0.6959, "step": 384 }, { "epoch": 0.07433867541996525, "grad_norm": 0.6693225502967834, "learning_rate": 3.7162162162162165e-05, "loss": 0.6887, "step": 385 }, { "epoch": 0.07453176288858854, "grad_norm": 0.6639240384101868, "learning_rate": 3.725868725868726e-05, "loss": 0.7768, "step": 386 }, { "epoch": 0.07472485035721181, "grad_norm": 0.9782634973526001, "learning_rate": 3.735521235521236e-05, "loss": 0.7427, "step": 387 }, { "epoch": 0.0749179378258351, "grad_norm": 1.626426100730896, "learning_rate": 3.745173745173745e-05, "loss": 0.7981, "step": 388 }, { "epoch": 0.07511102529445839, "grad_norm": 0.6610535383224487, "learning_rate": 3.754826254826255e-05, "loss": 0.6924, "step": 389 }, { "epoch": 0.07530411276308167, "grad_norm": 0.67239910364151, "learning_rate": 3.764478764478765e-05, "loss": 0.7416, "step": 390 }, { "epoch": 0.07549720023170496, "grad_norm": 0.7310374975204468, "learning_rate": 3.774131274131274e-05, "loss": 0.8173, "step": 391 }, { "epoch": 0.07569028770032825, "grad_norm": 0.8096733689308167, "learning_rate": 3.783783783783784e-05, "loss": 0.6877, "step": 392 }, { "epoch": 0.07588337516895154, "grad_norm": 0.6070708632469177, "learning_rate": 3.7934362934362935e-05, "loss": 0.7476, "step": 393 }, { "epoch": 0.07607646263757482, "grad_norm": 0.5712679624557495, "learning_rate": 3.8030888030888035e-05, "loss": 0.7527, "step": 394 }, { "epoch": 0.07626955010619811, "grad_norm": 0.8777661919593811, "learning_rate": 3.812741312741313e-05, "loss": 0.76, "step": 395 }, { "epoch": 0.0764626375748214, "grad_norm": 0.7096277475357056, "learning_rate": 3.822393822393823e-05, "loss": 0.7206, "step": 396 }, { "epoch": 0.07665572504344469, "grad_norm": 0.8942784070968628, "learning_rate": 3.832046332046332e-05, "loss": 0.7313, "step": 397 }, { "epoch": 0.07684881251206796, "grad_norm": 0.7163386344909668, "learning_rate": 3.841698841698842e-05, "loss": 0.799, "step": 398 }, { "epoch": 0.07704189998069125, "grad_norm": 0.6007486581802368, "learning_rate": 3.851351351351351e-05, "loss": 0.7662, "step": 399 }, { "epoch": 0.07723498744931453, "grad_norm": 0.7042856812477112, "learning_rate": 3.861003861003861e-05, "loss": 0.6991, "step": 400 }, { "epoch": 0.07742807491793782, "grad_norm": 1.4363938570022583, "learning_rate": 3.8706563706563705e-05, "loss": 0.8261, "step": 401 }, { "epoch": 0.07762116238656111, "grad_norm": 0.6663137078285217, "learning_rate": 3.8803088803088805e-05, "loss": 0.6564, "step": 402 }, { "epoch": 0.0778142498551844, "grad_norm": 0.6466824412345886, "learning_rate": 3.8899613899613905e-05, "loss": 0.7855, "step": 403 }, { "epoch": 0.07800733732380769, "grad_norm": 1.3641741275787354, "learning_rate": 3.8996138996139e-05, "loss": 0.6818, "step": 404 }, { "epoch": 0.07820042479243097, "grad_norm": 0.6272053718566895, "learning_rate": 3.90926640926641e-05, "loss": 0.7485, "step": 405 }, { "epoch": 0.07839351226105426, "grad_norm": 1.7906036376953125, "learning_rate": 3.918918918918919e-05, "loss": 0.8225, "step": 406 }, { "epoch": 0.07858659972967755, "grad_norm": 0.6979548931121826, "learning_rate": 3.928571428571429e-05, "loss": 0.7567, "step": 407 }, { "epoch": 0.07877968719830084, "grad_norm": 0.6851924657821655, "learning_rate": 3.938223938223938e-05, "loss": 0.7687, "step": 408 }, { "epoch": 0.07897277466692412, "grad_norm": 0.724315881729126, "learning_rate": 3.947876447876448e-05, "loss": 0.7785, "step": 409 }, { "epoch": 0.0791658621355474, "grad_norm": 0.7056208848953247, "learning_rate": 3.9575289575289576e-05, "loss": 0.7311, "step": 410 }, { "epoch": 0.07935894960417068, "grad_norm": 0.9213816523551941, "learning_rate": 3.9671814671814675e-05, "loss": 0.7832, "step": 411 }, { "epoch": 0.07955203707279397, "grad_norm": 0.6168084740638733, "learning_rate": 3.976833976833977e-05, "loss": 0.6867, "step": 412 }, { "epoch": 0.07974512454141726, "grad_norm": 0.636667013168335, "learning_rate": 3.986486486486487e-05, "loss": 0.6742, "step": 413 }, { "epoch": 0.07993821201004055, "grad_norm": 0.6224633455276489, "learning_rate": 3.996138996138996e-05, "loss": 0.7713, "step": 414 }, { "epoch": 0.08013129947866383, "grad_norm": 0.8147987127304077, "learning_rate": 4.005791505791506e-05, "loss": 0.7529, "step": 415 }, { "epoch": 0.08032438694728712, "grad_norm": 0.9840044379234314, "learning_rate": 4.015444015444015e-05, "loss": 0.7492, "step": 416 }, { "epoch": 0.08051747441591041, "grad_norm": 6.793372631072998, "learning_rate": 4.025096525096525e-05, "loss": 0.7298, "step": 417 }, { "epoch": 0.0807105618845337, "grad_norm": 0.6910251379013062, "learning_rate": 4.034749034749035e-05, "loss": 0.8029, "step": 418 }, { "epoch": 0.08090364935315698, "grad_norm": 0.6901378035545349, "learning_rate": 4.0444015444015446e-05, "loss": 0.6982, "step": 419 }, { "epoch": 0.08109673682178027, "grad_norm": 0.6735275983810425, "learning_rate": 4.0540540540540545e-05, "loss": 0.6971, "step": 420 }, { "epoch": 0.08128982429040356, "grad_norm": 1.4116977453231812, "learning_rate": 4.063706563706564e-05, "loss": 0.7048, "step": 421 }, { "epoch": 0.08148291175902683, "grad_norm": 0.7064179182052612, "learning_rate": 4.073359073359074e-05, "loss": 0.7913, "step": 422 }, { "epoch": 0.08167599922765012, "grad_norm": 0.6614903211593628, "learning_rate": 4.083011583011583e-05, "loss": 0.7572, "step": 423 }, { "epoch": 0.08186908669627341, "grad_norm": 0.687410295009613, "learning_rate": 4.092664092664093e-05, "loss": 0.8074, "step": 424 }, { "epoch": 0.0820621741648967, "grad_norm": 0.7592277526855469, "learning_rate": 4.102316602316602e-05, "loss": 0.7687, "step": 425 }, { "epoch": 0.08225526163351998, "grad_norm": 0.7253331542015076, "learning_rate": 4.111969111969112e-05, "loss": 0.7033, "step": 426 }, { "epoch": 0.08244834910214327, "grad_norm": 0.7125637531280518, "learning_rate": 4.1216216216216216e-05, "loss": 0.82, "step": 427 }, { "epoch": 0.08264143657076656, "grad_norm": 0.5537644624710083, "learning_rate": 4.1312741312741316e-05, "loss": 0.6858, "step": 428 }, { "epoch": 0.08283452403938985, "grad_norm": 0.964499294757843, "learning_rate": 4.140926640926641e-05, "loss": 0.7217, "step": 429 }, { "epoch": 0.08302761150801313, "grad_norm": 0.669491708278656, "learning_rate": 4.150579150579151e-05, "loss": 0.7516, "step": 430 }, { "epoch": 0.08322069897663642, "grad_norm": 0.6488585472106934, "learning_rate": 4.16023166023166e-05, "loss": 0.7338, "step": 431 }, { "epoch": 0.08341378644525971, "grad_norm": 0.6749062538146973, "learning_rate": 4.16988416988417e-05, "loss": 0.6925, "step": 432 }, { "epoch": 0.083606873913883, "grad_norm": 0.5656501650810242, "learning_rate": 4.17953667953668e-05, "loss": 0.6796, "step": 433 }, { "epoch": 0.08379996138250627, "grad_norm": 0.5810490846633911, "learning_rate": 4.189189189189189e-05, "loss": 0.7912, "step": 434 }, { "epoch": 0.08399304885112956, "grad_norm": 0.7931817173957825, "learning_rate": 4.198841698841699e-05, "loss": 0.7038, "step": 435 }, { "epoch": 0.08418613631975284, "grad_norm": 0.5406643152236938, "learning_rate": 4.2084942084942086e-05, "loss": 0.7203, "step": 436 }, { "epoch": 0.08437922378837613, "grad_norm": 0.6381701827049255, "learning_rate": 4.2181467181467186e-05, "loss": 0.7013, "step": 437 }, { "epoch": 0.08457231125699942, "grad_norm": 1.3012274503707886, "learning_rate": 4.227799227799228e-05, "loss": 0.7432, "step": 438 }, { "epoch": 0.0847653987256227, "grad_norm": 0.8085581660270691, "learning_rate": 4.237451737451738e-05, "loss": 0.6915, "step": 439 }, { "epoch": 0.084958486194246, "grad_norm": 0.6611363887786865, "learning_rate": 4.247104247104247e-05, "loss": 0.7485, "step": 440 }, { "epoch": 0.08515157366286928, "grad_norm": 0.7491820454597473, "learning_rate": 4.256756756756757e-05, "loss": 0.7029, "step": 441 }, { "epoch": 0.08534466113149257, "grad_norm": 0.5617631077766418, "learning_rate": 4.2664092664092664e-05, "loss": 0.7964, "step": 442 }, { "epoch": 0.08553774860011586, "grad_norm": 0.6043375730514526, "learning_rate": 4.276061776061776e-05, "loss": 0.7112, "step": 443 }, { "epoch": 0.08573083606873914, "grad_norm": 0.6073841452598572, "learning_rate": 4.2857142857142856e-05, "loss": 0.6982, "step": 444 }, { "epoch": 0.08592392353736243, "grad_norm": 1.4205626249313354, "learning_rate": 4.2953667953667956e-05, "loss": 0.6608, "step": 445 }, { "epoch": 0.0861170110059857, "grad_norm": 0.952065110206604, "learning_rate": 4.305019305019305e-05, "loss": 0.7506, "step": 446 }, { "epoch": 0.08631009847460899, "grad_norm": 0.7289600372314453, "learning_rate": 4.314671814671815e-05, "loss": 0.7611, "step": 447 }, { "epoch": 0.08650318594323228, "grad_norm": 0.5558276176452637, "learning_rate": 4.324324324324325e-05, "loss": 0.7715, "step": 448 }, { "epoch": 0.08669627341185557, "grad_norm": 1.7579823732376099, "learning_rate": 4.333976833976834e-05, "loss": 0.7702, "step": 449 }, { "epoch": 0.08688936088047886, "grad_norm": 0.8220595717430115, "learning_rate": 4.343629343629344e-05, "loss": 0.7702, "step": 450 }, { "epoch": 0.08708244834910214, "grad_norm": 0.616638720035553, "learning_rate": 4.3532818532818534e-05, "loss": 0.7663, "step": 451 }, { "epoch": 0.08727553581772543, "grad_norm": 0.6619438529014587, "learning_rate": 4.3629343629343633e-05, "loss": 0.7701, "step": 452 }, { "epoch": 0.08746862328634872, "grad_norm": 0.6234711408615112, "learning_rate": 4.3725868725868726e-05, "loss": 0.7937, "step": 453 }, { "epoch": 0.087661710754972, "grad_norm": 0.7878490686416626, "learning_rate": 4.3822393822393826e-05, "loss": 0.7102, "step": 454 }, { "epoch": 0.08785479822359529, "grad_norm": 0.8998988270759583, "learning_rate": 4.391891891891892e-05, "loss": 0.7716, "step": 455 }, { "epoch": 0.08804788569221858, "grad_norm": 0.6566177606582642, "learning_rate": 4.401544401544402e-05, "loss": 0.726, "step": 456 }, { "epoch": 0.08824097316084187, "grad_norm": 0.7310289740562439, "learning_rate": 4.411196911196911e-05, "loss": 0.7926, "step": 457 }, { "epoch": 0.08843406062946514, "grad_norm": 0.7258588671684265, "learning_rate": 4.420849420849421e-05, "loss": 0.7784, "step": 458 }, { "epoch": 0.08862714809808843, "grad_norm": 0.9863048791885376, "learning_rate": 4.4305019305019304e-05, "loss": 0.6893, "step": 459 }, { "epoch": 0.08882023556671172, "grad_norm": 0.5832343101501465, "learning_rate": 4.4401544401544404e-05, "loss": 0.7211, "step": 460 }, { "epoch": 0.089013323035335, "grad_norm": 0.7880433201789856, "learning_rate": 4.4498069498069503e-05, "loss": 0.7042, "step": 461 }, { "epoch": 0.08920641050395829, "grad_norm": 1.7603062391281128, "learning_rate": 4.4594594594594596e-05, "loss": 0.7256, "step": 462 }, { "epoch": 0.08939949797258158, "grad_norm": 1.394879698753357, "learning_rate": 4.4691119691119696e-05, "loss": 0.6941, "step": 463 }, { "epoch": 0.08959258544120487, "grad_norm": 1.176344871520996, "learning_rate": 4.478764478764479e-05, "loss": 0.7185, "step": 464 }, { "epoch": 0.08978567290982815, "grad_norm": 0.7222850918769836, "learning_rate": 4.488416988416989e-05, "loss": 0.7998, "step": 465 }, { "epoch": 0.08997876037845144, "grad_norm": 0.7621069550514221, "learning_rate": 4.498069498069498e-05, "loss": 0.7314, "step": 466 }, { "epoch": 0.09017184784707473, "grad_norm": 0.6082245111465454, "learning_rate": 4.507722007722008e-05, "loss": 0.7545, "step": 467 }, { "epoch": 0.09036493531569802, "grad_norm": 0.6272000074386597, "learning_rate": 4.5173745173745174e-05, "loss": 0.7482, "step": 468 }, { "epoch": 0.0905580227843213, "grad_norm": 1.2615721225738525, "learning_rate": 4.5270270270270274e-05, "loss": 0.7757, "step": 469 }, { "epoch": 0.09075111025294458, "grad_norm": 0.6288130283355713, "learning_rate": 4.536679536679537e-05, "loss": 0.7207, "step": 470 }, { "epoch": 0.09094419772156787, "grad_norm": 0.7336625456809998, "learning_rate": 4.5463320463320466e-05, "loss": 0.7044, "step": 471 }, { "epoch": 0.09113728519019115, "grad_norm": 0.6930909156799316, "learning_rate": 4.555984555984556e-05, "loss": 0.6976, "step": 472 }, { "epoch": 0.09133037265881444, "grad_norm": 1.4351006746292114, "learning_rate": 4.565637065637066e-05, "loss": 0.733, "step": 473 }, { "epoch": 0.09152346012743773, "grad_norm": 0.8012370467185974, "learning_rate": 4.575289575289575e-05, "loss": 0.7345, "step": 474 }, { "epoch": 0.09171654759606102, "grad_norm": 0.7723062038421631, "learning_rate": 4.584942084942085e-05, "loss": 0.8131, "step": 475 }, { "epoch": 0.0919096350646843, "grad_norm": 0.8961136937141418, "learning_rate": 4.594594594594595e-05, "loss": 0.7214, "step": 476 }, { "epoch": 0.09210272253330759, "grad_norm": 0.660580039024353, "learning_rate": 4.6042471042471044e-05, "loss": 0.6792, "step": 477 }, { "epoch": 0.09229581000193088, "grad_norm": 0.752389132976532, "learning_rate": 4.6138996138996144e-05, "loss": 0.7659, "step": 478 }, { "epoch": 0.09248889747055417, "grad_norm": 1.3502782583236694, "learning_rate": 4.623552123552124e-05, "loss": 0.7066, "step": 479 }, { "epoch": 0.09268198493917745, "grad_norm": 0.7188951969146729, "learning_rate": 4.6332046332046336e-05, "loss": 0.6936, "step": 480 }, { "epoch": 0.09287507240780074, "grad_norm": 0.6943327188491821, "learning_rate": 4.642857142857143e-05, "loss": 0.7721, "step": 481 }, { "epoch": 0.09306815987642401, "grad_norm": 0.5691218376159668, "learning_rate": 4.652509652509653e-05, "loss": 0.6708, "step": 482 }, { "epoch": 0.0932612473450473, "grad_norm": 0.6988184452056885, "learning_rate": 4.662162162162162e-05, "loss": 0.7051, "step": 483 }, { "epoch": 0.09345433481367059, "grad_norm": 0.7841290235519409, "learning_rate": 4.671814671814672e-05, "loss": 0.6585, "step": 484 }, { "epoch": 0.09364742228229388, "grad_norm": 0.7821927666664124, "learning_rate": 4.6814671814671815e-05, "loss": 0.7699, "step": 485 }, { "epoch": 0.09384050975091716, "grad_norm": 0.8412482142448425, "learning_rate": 4.6911196911196914e-05, "loss": 0.7432, "step": 486 }, { "epoch": 0.09403359721954045, "grad_norm": 0.6264936923980713, "learning_rate": 4.700772200772201e-05, "loss": 0.7796, "step": 487 }, { "epoch": 0.09422668468816374, "grad_norm": 0.8634293079376221, "learning_rate": 4.710424710424711e-05, "loss": 0.7875, "step": 488 }, { "epoch": 0.09441977215678703, "grad_norm": 0.7115240097045898, "learning_rate": 4.72007722007722e-05, "loss": 0.8005, "step": 489 }, { "epoch": 0.09461285962541031, "grad_norm": 0.6407897472381592, "learning_rate": 4.72972972972973e-05, "loss": 0.7268, "step": 490 }, { "epoch": 0.0948059470940336, "grad_norm": 0.7202383279800415, "learning_rate": 4.73938223938224e-05, "loss": 0.7199, "step": 491 }, { "epoch": 0.09499903456265689, "grad_norm": 1.2685513496398926, "learning_rate": 4.749034749034749e-05, "loss": 0.7235, "step": 492 }, { "epoch": 0.09519212203128016, "grad_norm": 0.7112973928451538, "learning_rate": 4.758687258687259e-05, "loss": 0.7584, "step": 493 }, { "epoch": 0.09538520949990345, "grad_norm": 0.664370059967041, "learning_rate": 4.7683397683397685e-05, "loss": 0.7172, "step": 494 }, { "epoch": 0.09557829696852674, "grad_norm": 2.2488884925842285, "learning_rate": 4.7779922779922784e-05, "loss": 0.7864, "step": 495 }, { "epoch": 0.09577138443715003, "grad_norm": 1.1994589567184448, "learning_rate": 4.787644787644788e-05, "loss": 0.7044, "step": 496 }, { "epoch": 0.09596447190577331, "grad_norm": 0.7808882594108582, "learning_rate": 4.797297297297298e-05, "loss": 0.7109, "step": 497 }, { "epoch": 0.0961575593743966, "grad_norm": 0.6748218536376953, "learning_rate": 4.806949806949807e-05, "loss": 0.7395, "step": 498 }, { "epoch": 0.09635064684301989, "grad_norm": 0.957978367805481, "learning_rate": 4.816602316602317e-05, "loss": 0.7048, "step": 499 }, { "epoch": 0.09654373431164318, "grad_norm": 0.5292345285415649, "learning_rate": 4.826254826254826e-05, "loss": 0.755, "step": 500 }, { "epoch": 0.09654373431164318, "eval_loss": 0.7778130769729614, "eval_runtime": 64.8965, "eval_samples_per_second": 10.232, "eval_steps_per_second": 0.324, "step": 500 }, { "epoch": 0.09673682178026646, "grad_norm": 0.616638720035553, "learning_rate": 4.835907335907336e-05, "loss": 0.7505, "step": 501 }, { "epoch": 0.09692990924888975, "grad_norm": 1.4260598421096802, "learning_rate": 4.8455598455598455e-05, "loss": 0.7571, "step": 502 }, { "epoch": 0.09712299671751304, "grad_norm": 0.8748161196708679, "learning_rate": 4.8552123552123555e-05, "loss": 0.6679, "step": 503 }, { "epoch": 0.09731608418613633, "grad_norm": 0.6353579163551331, "learning_rate": 4.8648648648648654e-05, "loss": 0.7521, "step": 504 }, { "epoch": 0.0975091716547596, "grad_norm": 0.7293747067451477, "learning_rate": 4.874517374517375e-05, "loss": 0.7279, "step": 505 }, { "epoch": 0.09770225912338289, "grad_norm": 0.9698399305343628, "learning_rate": 4.884169884169885e-05, "loss": 0.6834, "step": 506 }, { "epoch": 0.09789534659200617, "grad_norm": 0.5874331593513489, "learning_rate": 4.893822393822394e-05, "loss": 0.7521, "step": 507 }, { "epoch": 0.09808843406062946, "grad_norm": 0.7244163751602173, "learning_rate": 4.903474903474904e-05, "loss": 0.7471, "step": 508 }, { "epoch": 0.09828152152925275, "grad_norm": 0.6234381198883057, "learning_rate": 4.913127413127413e-05, "loss": 0.7143, "step": 509 }, { "epoch": 0.09847460899787604, "grad_norm": 2.385683536529541, "learning_rate": 4.922779922779923e-05, "loss": 0.6871, "step": 510 }, { "epoch": 0.09866769646649932, "grad_norm": 0.6156534552574158, "learning_rate": 4.9324324324324325e-05, "loss": 0.7275, "step": 511 }, { "epoch": 0.09886078393512261, "grad_norm": 0.6653795838356018, "learning_rate": 4.9420849420849425e-05, "loss": 0.6764, "step": 512 }, { "epoch": 0.0990538714037459, "grad_norm": 0.8032006621360779, "learning_rate": 4.951737451737452e-05, "loss": 0.7179, "step": 513 }, { "epoch": 0.09924695887236919, "grad_norm": 0.860110342502594, "learning_rate": 4.961389961389962e-05, "loss": 0.7193, "step": 514 }, { "epoch": 0.09944004634099247, "grad_norm": 0.813652753829956, "learning_rate": 4.971042471042471e-05, "loss": 0.6801, "step": 515 }, { "epoch": 0.09963313380961576, "grad_norm": 0.6797869801521301, "learning_rate": 4.980694980694981e-05, "loss": 0.6662, "step": 516 }, { "epoch": 0.09982622127823904, "grad_norm": 0.7177279591560364, "learning_rate": 4.99034749034749e-05, "loss": 0.7051, "step": 517 }, { "epoch": 0.10001930874686232, "grad_norm": 0.7668677568435669, "learning_rate": 5e-05, "loss": 0.792, "step": 518 }, { "epoch": 0.10021239621548561, "grad_norm": 0.668115496635437, "learning_rate": 5.00965250965251e-05, "loss": 0.7298, "step": 519 }, { "epoch": 0.1004054836841089, "grad_norm": 0.933905303478241, "learning_rate": 5.0193050193050195e-05, "loss": 0.7425, "step": 520 }, { "epoch": 0.10059857115273219, "grad_norm": 0.8206327557563782, "learning_rate": 5.0289575289575295e-05, "loss": 0.7312, "step": 521 }, { "epoch": 0.10079165862135547, "grad_norm": 0.6196517944335938, "learning_rate": 5.038610038610039e-05, "loss": 0.7581, "step": 522 }, { "epoch": 0.10098474608997876, "grad_norm": 0.6996350884437561, "learning_rate": 5.048262548262549e-05, "loss": 0.7029, "step": 523 }, { "epoch": 0.10117783355860205, "grad_norm": 1.0287654399871826, "learning_rate": 5.057915057915058e-05, "loss": 0.7599, "step": 524 }, { "epoch": 0.10137092102722534, "grad_norm": 0.6404280066490173, "learning_rate": 5.067567567567568e-05, "loss": 0.7312, "step": 525 }, { "epoch": 0.10156400849584862, "grad_norm": 0.5515587329864502, "learning_rate": 5.077220077220077e-05, "loss": 0.6982, "step": 526 }, { "epoch": 0.10175709596447191, "grad_norm": 0.7178388833999634, "learning_rate": 5.086872586872587e-05, "loss": 0.76, "step": 527 }, { "epoch": 0.1019501834330952, "grad_norm": 0.5733186602592468, "learning_rate": 5.0965250965250965e-05, "loss": 0.7788, "step": 528 }, { "epoch": 0.10214327090171847, "grad_norm": 0.6690077781677246, "learning_rate": 5.1061776061776065e-05, "loss": 0.7188, "step": 529 }, { "epoch": 0.10233635837034176, "grad_norm": 0.5850878953933716, "learning_rate": 5.115830115830116e-05, "loss": 0.6954, "step": 530 }, { "epoch": 0.10252944583896505, "grad_norm": 0.7557463645935059, "learning_rate": 5.125482625482626e-05, "loss": 0.7909, "step": 531 }, { "epoch": 0.10272253330758833, "grad_norm": 0.8406816720962524, "learning_rate": 5.135135135135135e-05, "loss": 0.7123, "step": 532 }, { "epoch": 0.10291562077621162, "grad_norm": 0.72758549451828, "learning_rate": 5.144787644787645e-05, "loss": 0.648, "step": 533 }, { "epoch": 0.10310870824483491, "grad_norm": 0.6355212926864624, "learning_rate": 5.154440154440154e-05, "loss": 0.7269, "step": 534 }, { "epoch": 0.1033017957134582, "grad_norm": 0.6472181081771851, "learning_rate": 5.164092664092664e-05, "loss": 0.74, "step": 535 }, { "epoch": 0.10349488318208148, "grad_norm": 0.769492506980896, "learning_rate": 5.1737451737451736e-05, "loss": 0.7036, "step": 536 }, { "epoch": 0.10368797065070477, "grad_norm": 0.7774934768676758, "learning_rate": 5.1833976833976835e-05, "loss": 0.7505, "step": 537 }, { "epoch": 0.10388105811932806, "grad_norm": 0.6572110652923584, "learning_rate": 5.193050193050193e-05, "loss": 0.733, "step": 538 }, { "epoch": 0.10407414558795135, "grad_norm": 0.735820472240448, "learning_rate": 5.202702702702703e-05, "loss": 0.7795, "step": 539 }, { "epoch": 0.10426723305657463, "grad_norm": 0.5570487976074219, "learning_rate": 5.212355212355212e-05, "loss": 0.718, "step": 540 }, { "epoch": 0.10446032052519791, "grad_norm": 0.5864914655685425, "learning_rate": 5.222007722007722e-05, "loss": 0.7556, "step": 541 }, { "epoch": 0.1046534079938212, "grad_norm": 0.7393521070480347, "learning_rate": 5.2316602316602313e-05, "loss": 0.7096, "step": 542 }, { "epoch": 0.10484649546244448, "grad_norm": 0.6799572110176086, "learning_rate": 5.241312741312741e-05, "loss": 0.748, "step": 543 }, { "epoch": 0.10503958293106777, "grad_norm": 0.7467478513717651, "learning_rate": 5.2509652509652506e-05, "loss": 0.7052, "step": 544 }, { "epoch": 0.10523267039969106, "grad_norm": 1.021146535873413, "learning_rate": 5.2606177606177606e-05, "loss": 0.658, "step": 545 }, { "epoch": 0.10542575786831435, "grad_norm": 0.6903501749038696, "learning_rate": 5.27027027027027e-05, "loss": 0.7877, "step": 546 }, { "epoch": 0.10561884533693763, "grad_norm": 0.7726394534111023, "learning_rate": 5.2799227799227805e-05, "loss": 0.7378, "step": 547 }, { "epoch": 0.10581193280556092, "grad_norm": 0.5436424612998962, "learning_rate": 5.2895752895752905e-05, "loss": 0.633, "step": 548 }, { "epoch": 0.10600502027418421, "grad_norm": 0.8809859752655029, "learning_rate": 5.2992277992278e-05, "loss": 0.7395, "step": 549 }, { "epoch": 0.1061981077428075, "grad_norm": 0.9103888273239136, "learning_rate": 5.30888030888031e-05, "loss": 0.7941, "step": 550 }, { "epoch": 0.10639119521143078, "grad_norm": 1.058484435081482, "learning_rate": 5.318532818532819e-05, "loss": 0.6961, "step": 551 }, { "epoch": 0.10658428268005407, "grad_norm": 0.7312431335449219, "learning_rate": 5.328185328185329e-05, "loss": 0.6695, "step": 552 }, { "epoch": 0.10677737014867734, "grad_norm": 0.6096143126487732, "learning_rate": 5.337837837837838e-05, "loss": 0.7376, "step": 553 }, { "epoch": 0.10697045761730063, "grad_norm": 0.9612897038459778, "learning_rate": 5.347490347490348e-05, "loss": 0.8168, "step": 554 }, { "epoch": 0.10716354508592392, "grad_norm": 1.331941843032837, "learning_rate": 5.3571428571428575e-05, "loss": 0.7209, "step": 555 }, { "epoch": 0.10735663255454721, "grad_norm": 0.6878387928009033, "learning_rate": 5.3667953667953675e-05, "loss": 0.6572, "step": 556 }, { "epoch": 0.1075497200231705, "grad_norm": 0.5558281540870667, "learning_rate": 5.376447876447877e-05, "loss": 0.7414, "step": 557 }, { "epoch": 0.10774280749179378, "grad_norm": 0.6560817360877991, "learning_rate": 5.386100386100387e-05, "loss": 0.7388, "step": 558 }, { "epoch": 0.10793589496041707, "grad_norm": 1.000905990600586, "learning_rate": 5.395752895752896e-05, "loss": 0.6864, "step": 559 }, { "epoch": 0.10812898242904036, "grad_norm": 0.7224311828613281, "learning_rate": 5.405405405405406e-05, "loss": 0.7565, "step": 560 }, { "epoch": 0.10832206989766364, "grad_norm": 0.6668733954429626, "learning_rate": 5.415057915057915e-05, "loss": 0.71, "step": 561 }, { "epoch": 0.10851515736628693, "grad_norm": 1.4102400541305542, "learning_rate": 5.424710424710425e-05, "loss": 0.6629, "step": 562 }, { "epoch": 0.10870824483491022, "grad_norm": 0.9939964413642883, "learning_rate": 5.4343629343629346e-05, "loss": 0.7304, "step": 563 }, { "epoch": 0.10890133230353351, "grad_norm": 1.2649308443069458, "learning_rate": 5.4440154440154445e-05, "loss": 0.7846, "step": 564 }, { "epoch": 0.10909441977215678, "grad_norm": 0.6297494173049927, "learning_rate": 5.453667953667954e-05, "loss": 0.7156, "step": 565 }, { "epoch": 0.10928750724078007, "grad_norm": 0.48908060789108276, "learning_rate": 5.463320463320464e-05, "loss": 0.6762, "step": 566 }, { "epoch": 0.10948059470940336, "grad_norm": 0.634666919708252, "learning_rate": 5.472972972972973e-05, "loss": 0.6954, "step": 567 }, { "epoch": 0.10967368217802664, "grad_norm": 0.7561770081520081, "learning_rate": 5.482625482625483e-05, "loss": 0.7585, "step": 568 }, { "epoch": 0.10986676964664993, "grad_norm": 0.5875459909439087, "learning_rate": 5.4922779922779924e-05, "loss": 0.6599, "step": 569 }, { "epoch": 0.11005985711527322, "grad_norm": 0.6881880164146423, "learning_rate": 5.501930501930502e-05, "loss": 0.7676, "step": 570 }, { "epoch": 0.1102529445838965, "grad_norm": 0.6751196980476379, "learning_rate": 5.5115830115830116e-05, "loss": 0.7114, "step": 571 }, { "epoch": 0.1104460320525198, "grad_norm": 0.6956008672714233, "learning_rate": 5.5212355212355216e-05, "loss": 0.7436, "step": 572 }, { "epoch": 0.11063911952114308, "grad_norm": 0.5721766352653503, "learning_rate": 5.530888030888031e-05, "loss": 0.6733, "step": 573 }, { "epoch": 0.11083220698976637, "grad_norm": 0.6493635177612305, "learning_rate": 5.540540540540541e-05, "loss": 0.7784, "step": 574 }, { "epoch": 0.11102529445838966, "grad_norm": 0.7796343564987183, "learning_rate": 5.55019305019305e-05, "loss": 0.7529, "step": 575 }, { "epoch": 0.11121838192701294, "grad_norm": 0.7348147034645081, "learning_rate": 5.55984555984556e-05, "loss": 0.7189, "step": 576 }, { "epoch": 0.11141146939563622, "grad_norm": 0.9751469492912292, "learning_rate": 5.56949806949807e-05, "loss": 0.7975, "step": 577 }, { "epoch": 0.1116045568642595, "grad_norm": 0.5967510342597961, "learning_rate": 5.5791505791505794e-05, "loss": 0.7314, "step": 578 }, { "epoch": 0.11179764433288279, "grad_norm": 1.0135568380355835, "learning_rate": 5.588803088803089e-05, "loss": 0.749, "step": 579 }, { "epoch": 0.11199073180150608, "grad_norm": 0.6515663266181946, "learning_rate": 5.5984555984555986e-05, "loss": 0.7383, "step": 580 }, { "epoch": 0.11218381927012937, "grad_norm": 4.125094413757324, "learning_rate": 5.6081081081081086e-05, "loss": 0.6945, "step": 581 }, { "epoch": 0.11237690673875265, "grad_norm": 0.5544619560241699, "learning_rate": 5.617760617760618e-05, "loss": 0.6508, "step": 582 }, { "epoch": 0.11256999420737594, "grad_norm": 0.8979211449623108, "learning_rate": 5.627413127413128e-05, "loss": 0.7391, "step": 583 }, { "epoch": 0.11276308167599923, "grad_norm": 0.6974206566810608, "learning_rate": 5.637065637065637e-05, "loss": 0.6633, "step": 584 }, { "epoch": 0.11295616914462252, "grad_norm": 0.9532239437103271, "learning_rate": 5.646718146718147e-05, "loss": 0.7062, "step": 585 }, { "epoch": 0.1131492566132458, "grad_norm": 0.5879608392715454, "learning_rate": 5.6563706563706564e-05, "loss": 0.7029, "step": 586 }, { "epoch": 0.11334234408186909, "grad_norm": 0.7971058487892151, "learning_rate": 5.6660231660231664e-05, "loss": 0.7656, "step": 587 }, { "epoch": 0.11353543155049237, "grad_norm": 1.184209942817688, "learning_rate": 5.6756756756756757e-05, "loss": 0.7222, "step": 588 }, { "epoch": 0.11372851901911565, "grad_norm": 0.8970884680747986, "learning_rate": 5.6853281853281856e-05, "loss": 0.7457, "step": 589 }, { "epoch": 0.11392160648773894, "grad_norm": 0.5574601888656616, "learning_rate": 5.694980694980695e-05, "loss": 0.7227, "step": 590 }, { "epoch": 0.11411469395636223, "grad_norm": 0.5283297300338745, "learning_rate": 5.704633204633205e-05, "loss": 0.7998, "step": 591 }, { "epoch": 0.11430778142498552, "grad_norm": 0.7167169451713562, "learning_rate": 5.714285714285714e-05, "loss": 0.6709, "step": 592 }, { "epoch": 0.1145008688936088, "grad_norm": 0.7873841524124146, "learning_rate": 5.723938223938224e-05, "loss": 0.7488, "step": 593 }, { "epoch": 0.11469395636223209, "grad_norm": 0.6315003037452698, "learning_rate": 5.7335907335907334e-05, "loss": 0.6846, "step": 594 }, { "epoch": 0.11488704383085538, "grad_norm": 0.5312097072601318, "learning_rate": 5.7432432432432434e-05, "loss": 0.7144, "step": 595 }, { "epoch": 0.11508013129947867, "grad_norm": 0.6428000330924988, "learning_rate": 5.752895752895753e-05, "loss": 0.7593, "step": 596 }, { "epoch": 0.11527321876810195, "grad_norm": 0.6256437301635742, "learning_rate": 5.7625482625482627e-05, "loss": 0.7055, "step": 597 }, { "epoch": 0.11546630623672524, "grad_norm": 0.6071249842643738, "learning_rate": 5.772200772200772e-05, "loss": 0.6924, "step": 598 }, { "epoch": 0.11565939370534853, "grad_norm": 0.6659126877784729, "learning_rate": 5.781853281853282e-05, "loss": 0.7793, "step": 599 }, { "epoch": 0.1158524811739718, "grad_norm": 0.9770428538322449, "learning_rate": 5.791505791505791e-05, "loss": 0.7503, "step": 600 }, { "epoch": 0.11604556864259509, "grad_norm": 0.5661723613739014, "learning_rate": 5.801158301158301e-05, "loss": 0.8029, "step": 601 }, { "epoch": 0.11623865611121838, "grad_norm": 0.6420595645904541, "learning_rate": 5.8108108108108105e-05, "loss": 0.7419, "step": 602 }, { "epoch": 0.11643174357984167, "grad_norm": 0.548065721988678, "learning_rate": 5.8204633204633204e-05, "loss": 0.6434, "step": 603 }, { "epoch": 0.11662483104846495, "grad_norm": 0.5621503591537476, "learning_rate": 5.83011583011583e-05, "loss": 0.6662, "step": 604 }, { "epoch": 0.11681791851708824, "grad_norm": 0.8948400616645813, "learning_rate": 5.8397683397683404e-05, "loss": 0.693, "step": 605 }, { "epoch": 0.11701100598571153, "grad_norm": 1.3903534412384033, "learning_rate": 5.8494208494208503e-05, "loss": 0.7683, "step": 606 }, { "epoch": 0.11720409345433482, "grad_norm": 0.5374670624732971, "learning_rate": 5.8590733590733596e-05, "loss": 0.7245, "step": 607 }, { "epoch": 0.1173971809229581, "grad_norm": 0.711693525314331, "learning_rate": 5.8687258687258696e-05, "loss": 0.7879, "step": 608 }, { "epoch": 0.11759026839158139, "grad_norm": 0.7820830941200256, "learning_rate": 5.878378378378379e-05, "loss": 0.7161, "step": 609 }, { "epoch": 0.11778335586020468, "grad_norm": 0.9591979384422302, "learning_rate": 5.888030888030889e-05, "loss": 0.7069, "step": 610 }, { "epoch": 0.11797644332882797, "grad_norm": 0.5058055520057678, "learning_rate": 5.897683397683398e-05, "loss": 0.7625, "step": 611 }, { "epoch": 0.11816953079745124, "grad_norm": 0.5258230566978455, "learning_rate": 5.907335907335908e-05, "loss": 0.7696, "step": 612 }, { "epoch": 0.11836261826607453, "grad_norm": 1.3049041032791138, "learning_rate": 5.9169884169884174e-05, "loss": 0.7053, "step": 613 }, { "epoch": 0.11855570573469781, "grad_norm": 3.5200490951538086, "learning_rate": 5.9266409266409274e-05, "loss": 0.7681, "step": 614 }, { "epoch": 0.1187487932033211, "grad_norm": 0.6677502989768982, "learning_rate": 5.936293436293437e-05, "loss": 0.8048, "step": 615 }, { "epoch": 0.11894188067194439, "grad_norm": 0.5838826894760132, "learning_rate": 5.9459459459459466e-05, "loss": 0.6803, "step": 616 }, { "epoch": 0.11913496814056768, "grad_norm": 0.5415284037590027, "learning_rate": 5.955598455598456e-05, "loss": 0.7463, "step": 617 }, { "epoch": 0.11932805560919096, "grad_norm": 0.6508578658103943, "learning_rate": 5.965250965250966e-05, "loss": 0.7484, "step": 618 }, { "epoch": 0.11952114307781425, "grad_norm": 0.6960828304290771, "learning_rate": 5.974903474903475e-05, "loss": 0.7714, "step": 619 }, { "epoch": 0.11971423054643754, "grad_norm": 0.6818757653236389, "learning_rate": 5.984555984555985e-05, "loss": 0.6883, "step": 620 }, { "epoch": 0.11990731801506083, "grad_norm": 2.174037218093872, "learning_rate": 5.9942084942084944e-05, "loss": 0.6881, "step": 621 }, { "epoch": 0.12010040548368411, "grad_norm": 0.8490345478057861, "learning_rate": 6.0038610038610044e-05, "loss": 0.6823, "step": 622 }, { "epoch": 0.1202934929523074, "grad_norm": 0.8328946828842163, "learning_rate": 6.013513513513514e-05, "loss": 0.6778, "step": 623 }, { "epoch": 0.12048658042093068, "grad_norm": 0.6721829771995544, "learning_rate": 6.023166023166024e-05, "loss": 0.7388, "step": 624 }, { "epoch": 0.12067966788955396, "grad_norm": 1.2082606554031372, "learning_rate": 6.032818532818533e-05, "loss": 0.7349, "step": 625 }, { "epoch": 0.12087275535817725, "grad_norm": 1.1507623195648193, "learning_rate": 6.042471042471043e-05, "loss": 0.7347, "step": 626 }, { "epoch": 0.12106584282680054, "grad_norm": 0.6768080592155457, "learning_rate": 6.052123552123552e-05, "loss": 0.7589, "step": 627 }, { "epoch": 0.12125893029542383, "grad_norm": 0.7767075896263123, "learning_rate": 6.061776061776062e-05, "loss": 0.7395, "step": 628 }, { "epoch": 0.12145201776404711, "grad_norm": 0.6093001365661621, "learning_rate": 6.0714285714285715e-05, "loss": 0.6645, "step": 629 }, { "epoch": 0.1216451052326704, "grad_norm": 1.098445177078247, "learning_rate": 6.0810810810810814e-05, "loss": 0.6948, "step": 630 }, { "epoch": 0.12183819270129369, "grad_norm": 0.5803216099739075, "learning_rate": 6.090733590733591e-05, "loss": 0.6841, "step": 631 }, { "epoch": 0.12203128016991698, "grad_norm": 1.0690213441848755, "learning_rate": 6.100386100386101e-05, "loss": 0.7141, "step": 632 }, { "epoch": 0.12222436763854026, "grad_norm": 0.8306679725646973, "learning_rate": 6.11003861003861e-05, "loss": 0.6713, "step": 633 }, { "epoch": 0.12241745510716355, "grad_norm": 0.8393546342849731, "learning_rate": 6.11969111969112e-05, "loss": 0.7437, "step": 634 }, { "epoch": 0.12261054257578684, "grad_norm": 1.415779948234558, "learning_rate": 6.12934362934363e-05, "loss": 0.7454, "step": 635 }, { "epoch": 0.12280363004441011, "grad_norm": 1.2453417778015137, "learning_rate": 6.138996138996139e-05, "loss": 0.7103, "step": 636 }, { "epoch": 0.1229967175130334, "grad_norm": 0.5550177693367004, "learning_rate": 6.14864864864865e-05, "loss": 0.745, "step": 637 }, { "epoch": 0.12318980498165669, "grad_norm": 1.1073613166809082, "learning_rate": 6.158301158301159e-05, "loss": 0.7488, "step": 638 }, { "epoch": 0.12338289245027997, "grad_norm": 0.9294238090515137, "learning_rate": 6.167953667953668e-05, "loss": 0.7581, "step": 639 }, { "epoch": 0.12357597991890326, "grad_norm": 0.7889374494552612, "learning_rate": 6.177606177606178e-05, "loss": 0.7415, "step": 640 }, { "epoch": 0.12376906738752655, "grad_norm": 0.810200572013855, "learning_rate": 6.187258687258688e-05, "loss": 0.7091, "step": 641 }, { "epoch": 0.12396215485614984, "grad_norm": 0.9371594786643982, "learning_rate": 6.196911196911198e-05, "loss": 0.7288, "step": 642 }, { "epoch": 0.12415524232477312, "grad_norm": 0.5766445398330688, "learning_rate": 6.206563706563707e-05, "loss": 0.7543, "step": 643 }, { "epoch": 0.12434832979339641, "grad_norm": 0.5837346315383911, "learning_rate": 6.216216216216216e-05, "loss": 0.7618, "step": 644 }, { "epoch": 0.1245414172620197, "grad_norm": 0.7801163792610168, "learning_rate": 6.225868725868727e-05, "loss": 0.7493, "step": 645 }, { "epoch": 0.12473450473064299, "grad_norm": 0.6228458285331726, "learning_rate": 6.235521235521236e-05, "loss": 0.6852, "step": 646 }, { "epoch": 0.12492759219926627, "grad_norm": 0.6683769822120667, "learning_rate": 6.245173745173745e-05, "loss": 0.7104, "step": 647 }, { "epoch": 0.12512067966788956, "grad_norm": 0.8570566773414612, "learning_rate": 6.254826254826255e-05, "loss": 0.6951, "step": 648 }, { "epoch": 0.12531376713651285, "grad_norm": 1.4235841035842896, "learning_rate": 6.264478764478765e-05, "loss": 0.7254, "step": 649 }, { "epoch": 0.12550685460513614, "grad_norm": 0.7936073541641235, "learning_rate": 6.274131274131275e-05, "loss": 0.7102, "step": 650 }, { "epoch": 0.12569994207375942, "grad_norm": 0.605908215045929, "learning_rate": 6.283783783783784e-05, "loss": 0.7856, "step": 651 }, { "epoch": 0.1258930295423827, "grad_norm": 0.7783729434013367, "learning_rate": 6.293436293436293e-05, "loss": 0.7611, "step": 652 }, { "epoch": 0.126086117011006, "grad_norm": 0.76744145154953, "learning_rate": 6.303088803088804e-05, "loss": 0.7185, "step": 653 }, { "epoch": 0.12627920447962926, "grad_norm": 0.719352126121521, "learning_rate": 6.312741312741313e-05, "loss": 0.6583, "step": 654 }, { "epoch": 0.12647229194825255, "grad_norm": 0.6601213812828064, "learning_rate": 6.322393822393823e-05, "loss": 0.6652, "step": 655 }, { "epoch": 0.12666537941687583, "grad_norm": 0.7865424156188965, "learning_rate": 6.332046332046332e-05, "loss": 0.7408, "step": 656 }, { "epoch": 0.12685846688549912, "grad_norm": 0.9604238867759705, "learning_rate": 6.341698841698842e-05, "loss": 0.7127, "step": 657 }, { "epoch": 0.1270515543541224, "grad_norm": 0.5854148268699646, "learning_rate": 6.351351351351352e-05, "loss": 0.7187, "step": 658 }, { "epoch": 0.1272446418227457, "grad_norm": 0.5655543208122253, "learning_rate": 6.361003861003861e-05, "loss": 0.7426, "step": 659 }, { "epoch": 0.12743772929136898, "grad_norm": 1.188993215560913, "learning_rate": 6.37065637065637e-05, "loss": 0.7386, "step": 660 }, { "epoch": 0.12763081675999227, "grad_norm": 0.5151922702789307, "learning_rate": 6.380308880308881e-05, "loss": 0.7541, "step": 661 }, { "epoch": 0.12782390422861556, "grad_norm": 0.9861944317817688, "learning_rate": 6.38996138996139e-05, "loss": 0.7373, "step": 662 }, { "epoch": 0.12801699169723885, "grad_norm": 0.9239577651023865, "learning_rate": 6.3996138996139e-05, "loss": 0.7026, "step": 663 }, { "epoch": 0.12821007916586213, "grad_norm": 0.6829615831375122, "learning_rate": 6.40926640926641e-05, "loss": 0.7747, "step": 664 }, { "epoch": 0.12840316663448542, "grad_norm": 0.7728827595710754, "learning_rate": 6.41891891891892e-05, "loss": 0.6912, "step": 665 }, { "epoch": 0.1285962541031087, "grad_norm": 0.6098606586456299, "learning_rate": 6.428571428571429e-05, "loss": 0.7497, "step": 666 }, { "epoch": 0.128789341571732, "grad_norm": 0.5770236849784851, "learning_rate": 6.438223938223938e-05, "loss": 0.6926, "step": 667 }, { "epoch": 0.12898242904035528, "grad_norm": 0.641086757183075, "learning_rate": 6.447876447876449e-05, "loss": 0.7422, "step": 668 }, { "epoch": 0.12917551650897857, "grad_norm": 2.395469903945923, "learning_rate": 6.457528957528958e-05, "loss": 0.6783, "step": 669 }, { "epoch": 0.12936860397760186, "grad_norm": 0.5837751030921936, "learning_rate": 6.467181467181467e-05, "loss": 0.6887, "step": 670 }, { "epoch": 0.12956169144622515, "grad_norm": 0.5658463835716248, "learning_rate": 6.476833976833977e-05, "loss": 0.7107, "step": 671 }, { "epoch": 0.12975477891484843, "grad_norm": 0.66454017162323, "learning_rate": 6.486486486486487e-05, "loss": 0.6847, "step": 672 }, { "epoch": 0.12994786638347172, "grad_norm": 0.8728488683700562, "learning_rate": 6.496138996138997e-05, "loss": 0.7223, "step": 673 }, { "epoch": 0.130140953852095, "grad_norm": 0.6007034778594971, "learning_rate": 6.505791505791506e-05, "loss": 0.7403, "step": 674 }, { "epoch": 0.1303340413207183, "grad_norm": 0.6436805725097656, "learning_rate": 6.515444015444015e-05, "loss": 0.7166, "step": 675 }, { "epoch": 0.13052712878934158, "grad_norm": 0.7396709322929382, "learning_rate": 6.525096525096526e-05, "loss": 0.7602, "step": 676 }, { "epoch": 0.13072021625796487, "grad_norm": 2.030311346054077, "learning_rate": 6.534749034749035e-05, "loss": 0.7639, "step": 677 }, { "epoch": 0.13091330372658813, "grad_norm": 0.8943464756011963, "learning_rate": 6.544401544401544e-05, "loss": 0.7274, "step": 678 }, { "epoch": 0.13110639119521142, "grad_norm": 0.6852810978889465, "learning_rate": 6.554054054054054e-05, "loss": 0.7438, "step": 679 }, { "epoch": 0.1312994786638347, "grad_norm": 0.8565996885299683, "learning_rate": 6.563706563706564e-05, "loss": 0.7104, "step": 680 }, { "epoch": 0.131492566132458, "grad_norm": 0.6860914826393127, "learning_rate": 6.573359073359074e-05, "loss": 0.6806, "step": 681 }, { "epoch": 0.13168565360108128, "grad_norm": 0.8085033297538757, "learning_rate": 6.583011583011583e-05, "loss": 0.6658, "step": 682 }, { "epoch": 0.13187874106970457, "grad_norm": 1.0588310956954956, "learning_rate": 6.592664092664092e-05, "loss": 0.6775, "step": 683 }, { "epoch": 0.13207182853832786, "grad_norm": 0.5968077182769775, "learning_rate": 6.602316602316603e-05, "loss": 0.7191, "step": 684 }, { "epoch": 0.13226491600695114, "grad_norm": 1.0356923341751099, "learning_rate": 6.611969111969112e-05, "loss": 0.7148, "step": 685 }, { "epoch": 0.13245800347557443, "grad_norm": 0.697124183177948, "learning_rate": 6.621621621621621e-05, "loss": 0.7265, "step": 686 }, { "epoch": 0.13265109094419772, "grad_norm": 0.5500757694244385, "learning_rate": 6.63127413127413e-05, "loss": 0.74, "step": 687 }, { "epoch": 0.132844178412821, "grad_norm": 0.9178681969642639, "learning_rate": 6.640926640926641e-05, "loss": 0.7491, "step": 688 }, { "epoch": 0.1330372658814443, "grad_norm": 0.9064716696739197, "learning_rate": 6.65057915057915e-05, "loss": 0.7255, "step": 689 }, { "epoch": 0.13323035335006758, "grad_norm": 1.16093111038208, "learning_rate": 6.66023166023166e-05, "loss": 0.7049, "step": 690 }, { "epoch": 0.13342344081869087, "grad_norm": 0.5885705351829529, "learning_rate": 6.66988416988417e-05, "loss": 0.6851, "step": 691 }, { "epoch": 0.13361652828731416, "grad_norm": 0.8831232786178589, "learning_rate": 6.67953667953668e-05, "loss": 0.7824, "step": 692 }, { "epoch": 0.13380961575593744, "grad_norm": 0.7695460319519043, "learning_rate": 6.68918918918919e-05, "loss": 0.7264, "step": 693 }, { "epoch": 0.13400270322456073, "grad_norm": 0.9788355827331543, "learning_rate": 6.6988416988417e-05, "loss": 0.7905, "step": 694 }, { "epoch": 0.13419579069318402, "grad_norm": 0.898735523223877, "learning_rate": 6.708494208494209e-05, "loss": 0.7917, "step": 695 }, { "epoch": 0.1343888781618073, "grad_norm": 0.9674891233444214, "learning_rate": 6.718146718146718e-05, "loss": 0.7009, "step": 696 }, { "epoch": 0.1345819656304306, "grad_norm": 0.6533107161521912, "learning_rate": 6.727799227799229e-05, "loss": 0.7181, "step": 697 }, { "epoch": 0.13477505309905388, "grad_norm": 0.8304387927055359, "learning_rate": 6.737451737451738e-05, "loss": 0.6462, "step": 698 }, { "epoch": 0.13496814056767717, "grad_norm": 2.0932180881500244, "learning_rate": 6.747104247104248e-05, "loss": 0.6404, "step": 699 }, { "epoch": 0.13516122803630046, "grad_norm": 0.7506155967712402, "learning_rate": 6.756756756756757e-05, "loss": 0.7562, "step": 700 }, { "epoch": 0.13535431550492372, "grad_norm": 0.8594201803207397, "learning_rate": 6.766409266409268e-05, "loss": 0.7872, "step": 701 }, { "epoch": 0.135547402973547, "grad_norm": 0.7295433282852173, "learning_rate": 6.776061776061777e-05, "loss": 0.7223, "step": 702 }, { "epoch": 0.1357404904421703, "grad_norm": 0.6222133040428162, "learning_rate": 6.785714285714286e-05, "loss": 0.6922, "step": 703 }, { "epoch": 0.13593357791079358, "grad_norm": 0.5396873354911804, "learning_rate": 6.795366795366795e-05, "loss": 0.7939, "step": 704 }, { "epoch": 0.13612666537941687, "grad_norm": 0.6117557287216187, "learning_rate": 6.805019305019306e-05, "loss": 0.7048, "step": 705 }, { "epoch": 0.13631975284804015, "grad_norm": 0.7107462882995605, "learning_rate": 6.814671814671815e-05, "loss": 0.6893, "step": 706 }, { "epoch": 0.13651284031666344, "grad_norm": 0.676642656326294, "learning_rate": 6.824324324324325e-05, "loss": 0.7514, "step": 707 }, { "epoch": 0.13670592778528673, "grad_norm": 0.6481294631958008, "learning_rate": 6.833976833976834e-05, "loss": 0.6861, "step": 708 }, { "epoch": 0.13689901525391002, "grad_norm": 1.5239580869674683, "learning_rate": 6.843629343629345e-05, "loss": 0.7636, "step": 709 }, { "epoch": 0.1370921027225333, "grad_norm": 1.0965473651885986, "learning_rate": 6.853281853281854e-05, "loss": 0.7225, "step": 710 }, { "epoch": 0.1372851901911566, "grad_norm": 0.6850960850715637, "learning_rate": 6.862934362934363e-05, "loss": 0.7092, "step": 711 }, { "epoch": 0.13747827765977988, "grad_norm": 0.4854830205440521, "learning_rate": 6.872586872586872e-05, "loss": 0.7424, "step": 712 }, { "epoch": 0.13767136512840317, "grad_norm": 0.654615581035614, "learning_rate": 6.882239382239383e-05, "loss": 0.7853, "step": 713 }, { "epoch": 0.13786445259702645, "grad_norm": 1.2588396072387695, "learning_rate": 6.891891891891892e-05, "loss": 0.6964, "step": 714 }, { "epoch": 0.13805754006564974, "grad_norm": 1.720842957496643, "learning_rate": 6.901544401544402e-05, "loss": 0.773, "step": 715 }, { "epoch": 0.13825062753427303, "grad_norm": 1.0452533960342407, "learning_rate": 6.911196911196911e-05, "loss": 0.7229, "step": 716 }, { "epoch": 0.13844371500289632, "grad_norm": 0.6208298206329346, "learning_rate": 6.920849420849422e-05, "loss": 0.7162, "step": 717 }, { "epoch": 0.1386368024715196, "grad_norm": 0.8013814687728882, "learning_rate": 6.930501930501931e-05, "loss": 0.6107, "step": 718 }, { "epoch": 0.1388298899401429, "grad_norm": 1.2108834981918335, "learning_rate": 6.94015444015444e-05, "loss": 0.705, "step": 719 }, { "epoch": 0.13902297740876618, "grad_norm": 1.1930105686187744, "learning_rate": 6.949806949806951e-05, "loss": 0.69, "step": 720 }, { "epoch": 0.13921606487738947, "grad_norm": 1.6441415548324585, "learning_rate": 6.95945945945946e-05, "loss": 0.7082, "step": 721 }, { "epoch": 0.13940915234601275, "grad_norm": 0.698668897151947, "learning_rate": 6.96911196911197e-05, "loss": 0.6895, "step": 722 }, { "epoch": 0.13960223981463604, "grad_norm": 0.6534443497657776, "learning_rate": 6.978764478764479e-05, "loss": 0.7436, "step": 723 }, { "epoch": 0.13979532728325933, "grad_norm": 0.615665078163147, "learning_rate": 6.988416988416989e-05, "loss": 0.7055, "step": 724 }, { "epoch": 0.1399884147518826, "grad_norm": 0.7732800245285034, "learning_rate": 6.998069498069499e-05, "loss": 0.6842, "step": 725 }, { "epoch": 0.14018150222050588, "grad_norm": 1.0091147422790527, "learning_rate": 7.007722007722008e-05, "loss": 0.6651, "step": 726 }, { "epoch": 0.14037458968912916, "grad_norm": 0.8309493660926819, "learning_rate": 7.017374517374517e-05, "loss": 0.7441, "step": 727 }, { "epoch": 0.14056767715775245, "grad_norm": 0.8717305660247803, "learning_rate": 7.027027027027028e-05, "loss": 0.7078, "step": 728 }, { "epoch": 0.14076076462637574, "grad_norm": 0.6777476072311401, "learning_rate": 7.036679536679537e-05, "loss": 0.6526, "step": 729 }, { "epoch": 0.14095385209499903, "grad_norm": 1.2722349166870117, "learning_rate": 7.046332046332046e-05, "loss": 0.7723, "step": 730 }, { "epoch": 0.14114693956362231, "grad_norm": 2.48962664604187, "learning_rate": 7.055984555984556e-05, "loss": 0.7434, "step": 731 }, { "epoch": 0.1413400270322456, "grad_norm": 1.66167414188385, "learning_rate": 7.065637065637066e-05, "loss": 0.796, "step": 732 }, { "epoch": 0.1415331145008689, "grad_norm": 0.6339986324310303, "learning_rate": 7.075289575289576e-05, "loss": 0.7205, "step": 733 }, { "epoch": 0.14172620196949218, "grad_norm": 0.9277063608169556, "learning_rate": 7.084942084942085e-05, "loss": 0.7466, "step": 734 }, { "epoch": 0.14191928943811546, "grad_norm": 0.6971203088760376, "learning_rate": 7.094594594594594e-05, "loss": 0.7027, "step": 735 }, { "epoch": 0.14211237690673875, "grad_norm": 0.7771477699279785, "learning_rate": 7.104247104247105e-05, "loss": 0.7385, "step": 736 }, { "epoch": 0.14230546437536204, "grad_norm": 0.682913601398468, "learning_rate": 7.113899613899614e-05, "loss": 0.7414, "step": 737 }, { "epoch": 0.14249855184398533, "grad_norm": 5.828978061676025, "learning_rate": 7.123552123552123e-05, "loss": 0.6818, "step": 738 }, { "epoch": 0.14269163931260861, "grad_norm": 0.9037524461746216, "learning_rate": 7.133204633204633e-05, "loss": 0.7468, "step": 739 }, { "epoch": 0.1428847267812319, "grad_norm": 0.7160452604293823, "learning_rate": 7.142857142857143e-05, "loss": 0.7536, "step": 740 }, { "epoch": 0.1430778142498552, "grad_norm": 0.7971741557121277, "learning_rate": 7.152509652509653e-05, "loss": 0.6595, "step": 741 }, { "epoch": 0.14327090171847848, "grad_norm": 0.7362459301948547, "learning_rate": 7.162162162162162e-05, "loss": 0.6784, "step": 742 }, { "epoch": 0.14346398918710176, "grad_norm": 0.8386814594268799, "learning_rate": 7.171814671814671e-05, "loss": 0.7763, "step": 743 }, { "epoch": 0.14365707665572505, "grad_norm": 1.1444711685180664, "learning_rate": 7.181467181467182e-05, "loss": 0.7679, "step": 744 }, { "epoch": 0.14385016412434834, "grad_norm": 1.0610361099243164, "learning_rate": 7.191119691119691e-05, "loss": 0.7267, "step": 745 }, { "epoch": 0.14404325159297163, "grad_norm": 0.9501770734786987, "learning_rate": 7.2007722007722e-05, "loss": 0.6565, "step": 746 }, { "epoch": 0.14423633906159491, "grad_norm": 0.6948303580284119, "learning_rate": 7.21042471042471e-05, "loss": 0.7188, "step": 747 }, { "epoch": 0.1444294265302182, "grad_norm": 0.7849158048629761, "learning_rate": 7.22007722007722e-05, "loss": 0.6901, "step": 748 }, { "epoch": 0.14462251399884146, "grad_norm": 0.8796669244766235, "learning_rate": 7.229729729729731e-05, "loss": 0.712, "step": 749 }, { "epoch": 0.14481560146746475, "grad_norm": 0.7821425199508667, "learning_rate": 7.23938223938224e-05, "loss": 0.693, "step": 750 }, { "epoch": 0.14500868893608804, "grad_norm": 0.968515157699585, "learning_rate": 7.24903474903475e-05, "loss": 0.6858, "step": 751 }, { "epoch": 0.14520177640471132, "grad_norm": 0.9261429309844971, "learning_rate": 7.258687258687259e-05, "loss": 0.7752, "step": 752 }, { "epoch": 0.1453948638733346, "grad_norm": 3.003952741622925, "learning_rate": 7.26833976833977e-05, "loss": 0.742, "step": 753 }, { "epoch": 0.1455879513419579, "grad_norm": 0.8192519545555115, "learning_rate": 7.277992277992279e-05, "loss": 0.6592, "step": 754 }, { "epoch": 0.1457810388105812, "grad_norm": 1.312096357345581, "learning_rate": 7.287644787644788e-05, "loss": 0.7496, "step": 755 }, { "epoch": 0.14597412627920447, "grad_norm": 0.6488781571388245, "learning_rate": 7.297297297297297e-05, "loss": 0.6804, "step": 756 }, { "epoch": 0.14616721374782776, "grad_norm": 1.6273335218429565, "learning_rate": 7.306949806949808e-05, "loss": 0.6552, "step": 757 }, { "epoch": 0.14636030121645105, "grad_norm": 1.2327557802200317, "learning_rate": 7.316602316602317e-05, "loss": 0.7361, "step": 758 }, { "epoch": 0.14655338868507434, "grad_norm": 0.8222900629043579, "learning_rate": 7.326254826254827e-05, "loss": 0.7606, "step": 759 }, { "epoch": 0.14674647615369762, "grad_norm": 0.5982229709625244, "learning_rate": 7.335907335907336e-05, "loss": 0.7175, "step": 760 }, { "epoch": 0.1469395636223209, "grad_norm": 0.9498708844184875, "learning_rate": 7.345559845559847e-05, "loss": 0.7286, "step": 761 }, { "epoch": 0.1471326510909442, "grad_norm": 1.1568856239318848, "learning_rate": 7.355212355212356e-05, "loss": 0.635, "step": 762 }, { "epoch": 0.1473257385595675, "grad_norm": 1.2808427810668945, "learning_rate": 7.364864864864865e-05, "loss": 0.7313, "step": 763 }, { "epoch": 0.14751882602819077, "grad_norm": 0.8438487648963928, "learning_rate": 7.374517374517374e-05, "loss": 0.7471, "step": 764 }, { "epoch": 0.14771191349681406, "grad_norm": 0.9349913597106934, "learning_rate": 7.384169884169885e-05, "loss": 0.7187, "step": 765 }, { "epoch": 0.14790500096543735, "grad_norm": 1.399659276008606, "learning_rate": 7.393822393822394e-05, "loss": 0.746, "step": 766 }, { "epoch": 0.14809808843406064, "grad_norm": 0.7694143652915955, "learning_rate": 7.403474903474904e-05, "loss": 0.7058, "step": 767 }, { "epoch": 0.14829117590268392, "grad_norm": 0.7506746053695679, "learning_rate": 7.413127413127413e-05, "loss": 0.6676, "step": 768 }, { "epoch": 0.1484842633713072, "grad_norm": 0.9023545980453491, "learning_rate": 7.422779922779924e-05, "loss": 0.664, "step": 769 }, { "epoch": 0.1486773508399305, "grad_norm": 0.7709887623786926, "learning_rate": 7.432432432432433e-05, "loss": 0.7442, "step": 770 }, { "epoch": 0.1488704383085538, "grad_norm": 1.0995734930038452, "learning_rate": 7.442084942084942e-05, "loss": 0.7452, "step": 771 }, { "epoch": 0.14906352577717707, "grad_norm": 0.9865106344223022, "learning_rate": 7.451737451737452e-05, "loss": 0.7252, "step": 772 }, { "epoch": 0.14925661324580033, "grad_norm": 0.8823658227920532, "learning_rate": 7.461389961389962e-05, "loss": 0.7285, "step": 773 }, { "epoch": 0.14944970071442362, "grad_norm": 0.7966082096099854, "learning_rate": 7.471042471042471e-05, "loss": 0.6752, "step": 774 }, { "epoch": 0.1496427881830469, "grad_norm": 0.8793091773986816, "learning_rate": 7.480694980694981e-05, "loss": 0.7328, "step": 775 }, { "epoch": 0.1498358756516702, "grad_norm": 0.903807520866394, "learning_rate": 7.49034749034749e-05, "loss": 0.6709, "step": 776 }, { "epoch": 0.15002896312029348, "grad_norm": 1.0642708539962769, "learning_rate": 7.500000000000001e-05, "loss": 0.636, "step": 777 }, { "epoch": 0.15022205058891677, "grad_norm": 1.2188619375228882, "learning_rate": 7.50965250965251e-05, "loss": 0.7295, "step": 778 }, { "epoch": 0.15041513805754006, "grad_norm": 1.5018128156661987, "learning_rate": 7.519305019305019e-05, "loss": 0.7467, "step": 779 }, { "epoch": 0.15060822552616335, "grad_norm": 1.048971176147461, "learning_rate": 7.52895752895753e-05, "loss": 0.7614, "step": 780 }, { "epoch": 0.15080131299478663, "grad_norm": 1.1628470420837402, "learning_rate": 7.538610038610039e-05, "loss": 0.7321, "step": 781 }, { "epoch": 0.15099440046340992, "grad_norm": 1.2986804246902466, "learning_rate": 7.548262548262549e-05, "loss": 0.6686, "step": 782 }, { "epoch": 0.1511874879320332, "grad_norm": 2.1267917156219482, "learning_rate": 7.557915057915058e-05, "loss": 0.7388, "step": 783 }, { "epoch": 0.1513805754006565, "grad_norm": 1.4338945150375366, "learning_rate": 7.567567567567568e-05, "loss": 0.6959, "step": 784 }, { "epoch": 0.15157366286927978, "grad_norm": 1.5865932703018188, "learning_rate": 7.577220077220078e-05, "loss": 0.7256, "step": 785 }, { "epoch": 0.15176675033790307, "grad_norm": 1.4001312255859375, "learning_rate": 7.586872586872587e-05, "loss": 0.76, "step": 786 }, { "epoch": 0.15195983780652636, "grad_norm": 3.3699355125427246, "learning_rate": 7.596525096525096e-05, "loss": 0.7051, "step": 787 }, { "epoch": 0.15215292527514965, "grad_norm": 1.2554651498794556, "learning_rate": 7.606177606177607e-05, "loss": 0.6533, "step": 788 }, { "epoch": 0.15234601274377294, "grad_norm": 5.961514472961426, "learning_rate": 7.615830115830116e-05, "loss": 0.7036, "step": 789 }, { "epoch": 0.15253910021239622, "grad_norm": 1.4650853872299194, "learning_rate": 7.625482625482626e-05, "loss": 0.7063, "step": 790 }, { "epoch": 0.1527321876810195, "grad_norm": 0.9995442628860474, "learning_rate": 7.635135135135135e-05, "loss": 0.6932, "step": 791 }, { "epoch": 0.1529252751496428, "grad_norm": 1.2757855653762817, "learning_rate": 7.644787644787645e-05, "loss": 0.7581, "step": 792 }, { "epoch": 0.15311836261826609, "grad_norm": 1.4017407894134521, "learning_rate": 7.654440154440155e-05, "loss": 0.7366, "step": 793 }, { "epoch": 0.15331145008688937, "grad_norm": 1.7690349817276, "learning_rate": 7.664092664092664e-05, "loss": 0.7081, "step": 794 }, { "epoch": 0.15350453755551266, "grad_norm": 1.6864867210388184, "learning_rate": 7.673745173745173e-05, "loss": 0.6777, "step": 795 }, { "epoch": 0.15369762502413592, "grad_norm": 1.5688890218734741, "learning_rate": 7.683397683397684e-05, "loss": 0.7186, "step": 796 }, { "epoch": 0.1538907124927592, "grad_norm": 1.2896614074707031, "learning_rate": 7.693050193050193e-05, "loss": 0.7619, "step": 797 }, { "epoch": 0.1540837999613825, "grad_norm": 1.206705927848816, "learning_rate": 7.702702702702703e-05, "loss": 0.6886, "step": 798 }, { "epoch": 0.15427688743000578, "grad_norm": 1.6668349504470825, "learning_rate": 7.712355212355212e-05, "loss": 0.7095, "step": 799 }, { "epoch": 0.15446997489862907, "grad_norm": 1.2667540311813354, "learning_rate": 7.722007722007723e-05, "loss": 0.7272, "step": 800 }, { "epoch": 0.15466306236725236, "grad_norm": 1.140042781829834, "learning_rate": 7.731660231660232e-05, "loss": 0.7045, "step": 801 }, { "epoch": 0.15485614983587564, "grad_norm": 1.0924144983291626, "learning_rate": 7.741312741312741e-05, "loss": 0.6722, "step": 802 }, { "epoch": 0.15504923730449893, "grad_norm": 0.9751585125923157, "learning_rate": 7.75096525096525e-05, "loss": 0.667, "step": 803 }, { "epoch": 0.15524232477312222, "grad_norm": 1.2220195531845093, "learning_rate": 7.760617760617761e-05, "loss": 0.6921, "step": 804 }, { "epoch": 0.1554354122417455, "grad_norm": 0.9560672640800476, "learning_rate": 7.77027027027027e-05, "loss": 0.7358, "step": 805 }, { "epoch": 0.1556284997103688, "grad_norm": 1.0104918479919434, "learning_rate": 7.779922779922781e-05, "loss": 0.6612, "step": 806 }, { "epoch": 0.15582158717899208, "grad_norm": 1.1506626605987549, "learning_rate": 7.78957528957529e-05, "loss": 0.7304, "step": 807 }, { "epoch": 0.15601467464761537, "grad_norm": 1.2981065511703491, "learning_rate": 7.7992277992278e-05, "loss": 0.7273, "step": 808 }, { "epoch": 0.15620776211623866, "grad_norm": 1.7871519327163696, "learning_rate": 7.80888030888031e-05, "loss": 0.7648, "step": 809 }, { "epoch": 0.15640084958486195, "grad_norm": 2.8502984046936035, "learning_rate": 7.81853281853282e-05, "loss": 0.7263, "step": 810 }, { "epoch": 0.15659393705348523, "grad_norm": 1.0648274421691895, "learning_rate": 7.828185328185329e-05, "loss": 0.6581, "step": 811 }, { "epoch": 0.15678702452210852, "grad_norm": 1.2515504360198975, "learning_rate": 7.837837837837838e-05, "loss": 0.7051, "step": 812 }, { "epoch": 0.1569801119907318, "grad_norm": 1.6279027462005615, "learning_rate": 7.847490347490349e-05, "loss": 0.7256, "step": 813 }, { "epoch": 0.1571731994593551, "grad_norm": 1.036463975906372, "learning_rate": 7.857142857142858e-05, "loss": 0.7352, "step": 814 }, { "epoch": 0.15736628692797838, "grad_norm": 0.8323183655738831, "learning_rate": 7.866795366795367e-05, "loss": 0.7369, "step": 815 }, { "epoch": 0.15755937439660167, "grad_norm": 1.7882062196731567, "learning_rate": 7.876447876447877e-05, "loss": 0.6593, "step": 816 }, { "epoch": 0.15775246186522496, "grad_norm": 1.3838016986846924, "learning_rate": 7.886100386100387e-05, "loss": 0.8315, "step": 817 }, { "epoch": 0.15794554933384825, "grad_norm": 0.8460897207260132, "learning_rate": 7.895752895752897e-05, "loss": 0.7127, "step": 818 }, { "epoch": 0.15813863680247153, "grad_norm": 1.308506965637207, "learning_rate": 7.905405405405406e-05, "loss": 0.6802, "step": 819 }, { "epoch": 0.1583317242710948, "grad_norm": 1.2187436819076538, "learning_rate": 7.915057915057915e-05, "loss": 0.6841, "step": 820 }, { "epoch": 0.15852481173971808, "grad_norm": 2.4532928466796875, "learning_rate": 7.924710424710426e-05, "loss": 0.6766, "step": 821 }, { "epoch": 0.15871789920834137, "grad_norm": 0.7195900082588196, "learning_rate": 7.934362934362935e-05, "loss": 0.6708, "step": 822 }, { "epoch": 0.15891098667696466, "grad_norm": 1.1829057931900024, "learning_rate": 7.944015444015444e-05, "loss": 0.7771, "step": 823 }, { "epoch": 0.15910407414558794, "grad_norm": 0.8561239838600159, "learning_rate": 7.953667953667954e-05, "loss": 0.6785, "step": 824 }, { "epoch": 0.15929716161421123, "grad_norm": 0.9020392298698425, "learning_rate": 7.963320463320464e-05, "loss": 0.7286, "step": 825 }, { "epoch": 0.15949024908283452, "grad_norm": 1.0766303539276123, "learning_rate": 7.972972972972974e-05, "loss": 0.6798, "step": 826 }, { "epoch": 0.1596833365514578, "grad_norm": 0.8791629076004028, "learning_rate": 7.982625482625483e-05, "loss": 0.7143, "step": 827 }, { "epoch": 0.1598764240200811, "grad_norm": 0.8124934434890747, "learning_rate": 7.992277992277992e-05, "loss": 0.7271, "step": 828 }, { "epoch": 0.16006951148870438, "grad_norm": 1.1277226209640503, "learning_rate": 8.001930501930503e-05, "loss": 0.6571, "step": 829 }, { "epoch": 0.16026259895732767, "grad_norm": 0.9750187397003174, "learning_rate": 8.011583011583012e-05, "loss": 0.7044, "step": 830 }, { "epoch": 0.16045568642595096, "grad_norm": 1.0004538297653198, "learning_rate": 8.021235521235521e-05, "loss": 0.6559, "step": 831 }, { "epoch": 0.16064877389457424, "grad_norm": 0.8606877326965332, "learning_rate": 8.03088803088803e-05, "loss": 0.7136, "step": 832 }, { "epoch": 0.16084186136319753, "grad_norm": 1.4239130020141602, "learning_rate": 8.040540540540541e-05, "loss": 0.7156, "step": 833 }, { "epoch": 0.16103494883182082, "grad_norm": 0.8441488146781921, "learning_rate": 8.05019305019305e-05, "loss": 0.79, "step": 834 }, { "epoch": 0.1612280363004441, "grad_norm": 0.9555944800376892, "learning_rate": 8.05984555984556e-05, "loss": 0.731, "step": 835 }, { "epoch": 0.1614211237690674, "grad_norm": 1.5624685287475586, "learning_rate": 8.06949806949807e-05, "loss": 0.6493, "step": 836 }, { "epoch": 0.16161421123769068, "grad_norm": 0.6269006133079529, "learning_rate": 8.07915057915058e-05, "loss": 0.7, "step": 837 }, { "epoch": 0.16180729870631397, "grad_norm": 0.8831353783607483, "learning_rate": 8.088803088803089e-05, "loss": 0.7467, "step": 838 }, { "epoch": 0.16200038617493726, "grad_norm": 0.7098029255867004, "learning_rate": 8.098455598455598e-05, "loss": 0.7072, "step": 839 }, { "epoch": 0.16219347364356054, "grad_norm": 0.9858813285827637, "learning_rate": 8.108108108108109e-05, "loss": 0.7334, "step": 840 }, { "epoch": 0.16238656111218383, "grad_norm": 1.0055242776870728, "learning_rate": 8.117760617760618e-05, "loss": 0.7548, "step": 841 }, { "epoch": 0.16257964858080712, "grad_norm": 2.1536641120910645, "learning_rate": 8.127413127413128e-05, "loss": 0.6975, "step": 842 }, { "epoch": 0.1627727360494304, "grad_norm": 0.8294567465782166, "learning_rate": 8.137065637065637e-05, "loss": 0.8299, "step": 843 }, { "epoch": 0.16296582351805367, "grad_norm": 1.1222862005233765, "learning_rate": 8.146718146718148e-05, "loss": 0.7249, "step": 844 }, { "epoch": 0.16315891098667695, "grad_norm": 1.1220173835754395, "learning_rate": 8.156370656370657e-05, "loss": 0.7018, "step": 845 }, { "epoch": 0.16335199845530024, "grad_norm": 1.0001733303070068, "learning_rate": 8.166023166023166e-05, "loss": 0.7055, "step": 846 }, { "epoch": 0.16354508592392353, "grad_norm": 2.755784034729004, "learning_rate": 8.175675675675675e-05, "loss": 0.6897, "step": 847 }, { "epoch": 0.16373817339254682, "grad_norm": 0.7910317182540894, "learning_rate": 8.185328185328186e-05, "loss": 0.649, "step": 848 }, { "epoch": 0.1639312608611701, "grad_norm": 0.6784765720367432, "learning_rate": 8.194980694980695e-05, "loss": 0.7617, "step": 849 }, { "epoch": 0.1641243483297934, "grad_norm": 0.8339208960533142, "learning_rate": 8.204633204633205e-05, "loss": 0.719, "step": 850 }, { "epoch": 0.16431743579841668, "grad_norm": 0.9850757122039795, "learning_rate": 8.214285714285714e-05, "loss": 0.7452, "step": 851 }, { "epoch": 0.16451052326703997, "grad_norm": 1.3591580390930176, "learning_rate": 8.223938223938225e-05, "loss": 0.7684, "step": 852 }, { "epoch": 0.16470361073566325, "grad_norm": 0.8805522918701172, "learning_rate": 8.233590733590734e-05, "loss": 0.6384, "step": 853 }, { "epoch": 0.16489669820428654, "grad_norm": 0.6965591907501221, "learning_rate": 8.243243243243243e-05, "loss": 0.697, "step": 854 }, { "epoch": 0.16508978567290983, "grad_norm": 0.9788227081298828, "learning_rate": 8.252895752895752e-05, "loss": 0.6862, "step": 855 }, { "epoch": 0.16528287314153312, "grad_norm": 0.8783449530601501, "learning_rate": 8.262548262548263e-05, "loss": 0.6605, "step": 856 }, { "epoch": 0.1654759606101564, "grad_norm": 0.8783519268035889, "learning_rate": 8.272200772200772e-05, "loss": 0.7132, "step": 857 }, { "epoch": 0.1656690480787797, "grad_norm": 1.1397087574005127, "learning_rate": 8.281853281853282e-05, "loss": 0.6953, "step": 858 }, { "epoch": 0.16586213554740298, "grad_norm": 0.7612053751945496, "learning_rate": 8.291505791505791e-05, "loss": 0.7196, "step": 859 }, { "epoch": 0.16605522301602627, "grad_norm": 0.6001445651054382, "learning_rate": 8.301158301158302e-05, "loss": 0.7529, "step": 860 }, { "epoch": 0.16624831048464955, "grad_norm": 0.8902844190597534, "learning_rate": 8.310810810810811e-05, "loss": 0.7329, "step": 861 }, { "epoch": 0.16644139795327284, "grad_norm": 0.8631682991981506, "learning_rate": 8.32046332046332e-05, "loss": 0.6389, "step": 862 }, { "epoch": 0.16663448542189613, "grad_norm": 0.7156873345375061, "learning_rate": 8.33011583011583e-05, "loss": 0.7132, "step": 863 }, { "epoch": 0.16682757289051942, "grad_norm": 0.8356594443321228, "learning_rate": 8.33976833976834e-05, "loss": 0.7587, "step": 864 }, { "epoch": 0.1670206603591427, "grad_norm": 0.7552937865257263, "learning_rate": 8.349420849420851e-05, "loss": 0.6984, "step": 865 }, { "epoch": 0.167213747827766, "grad_norm": 0.651897132396698, "learning_rate": 8.35907335907336e-05, "loss": 0.6307, "step": 866 }, { "epoch": 0.16740683529638928, "grad_norm": 0.7253302335739136, "learning_rate": 8.36872586872587e-05, "loss": 0.7543, "step": 867 }, { "epoch": 0.16759992276501254, "grad_norm": 0.8704149723052979, "learning_rate": 8.378378378378379e-05, "loss": 0.7441, "step": 868 }, { "epoch": 0.16779301023363583, "grad_norm": 0.9231045246124268, "learning_rate": 8.388030888030889e-05, "loss": 0.7176, "step": 869 }, { "epoch": 0.1679860977022591, "grad_norm": 0.8094666600227356, "learning_rate": 8.397683397683399e-05, "loss": 0.6923, "step": 870 }, { "epoch": 0.1681791851708824, "grad_norm": 2.145695686340332, "learning_rate": 8.407335907335908e-05, "loss": 0.6788, "step": 871 }, { "epoch": 0.1683722726395057, "grad_norm": 0.8072236180305481, "learning_rate": 8.416988416988417e-05, "loss": 0.803, "step": 872 }, { "epoch": 0.16856536010812898, "grad_norm": 1.13296639919281, "learning_rate": 8.426640926640928e-05, "loss": 0.6789, "step": 873 }, { "epoch": 0.16875844757675226, "grad_norm": 0.7761037349700928, "learning_rate": 8.436293436293437e-05, "loss": 0.7035, "step": 874 }, { "epoch": 0.16895153504537555, "grad_norm": 1.0782502889633179, "learning_rate": 8.445945945945946e-05, "loss": 0.7273, "step": 875 }, { "epoch": 0.16914462251399884, "grad_norm": 0.8474161624908447, "learning_rate": 8.455598455598456e-05, "loss": 0.7288, "step": 876 }, { "epoch": 0.16933770998262213, "grad_norm": 1.0149093866348267, "learning_rate": 8.465250965250966e-05, "loss": 0.6652, "step": 877 }, { "epoch": 0.1695307974512454, "grad_norm": 1.0075496435165405, "learning_rate": 8.474903474903476e-05, "loss": 0.7065, "step": 878 }, { "epoch": 0.1697238849198687, "grad_norm": 1.4156596660614014, "learning_rate": 8.484555984555985e-05, "loss": 0.6923, "step": 879 }, { "epoch": 0.169916972388492, "grad_norm": 1.9667307138442993, "learning_rate": 8.494208494208494e-05, "loss": 0.687, "step": 880 }, { "epoch": 0.17011005985711528, "grad_norm": 0.9786198735237122, "learning_rate": 8.503861003861005e-05, "loss": 0.6716, "step": 881 }, { "epoch": 0.17030314732573856, "grad_norm": 0.8741691708564758, "learning_rate": 8.513513513513514e-05, "loss": 0.6747, "step": 882 }, { "epoch": 0.17049623479436185, "grad_norm": 1.0909062623977661, "learning_rate": 8.523166023166023e-05, "loss": 0.6978, "step": 883 }, { "epoch": 0.17068932226298514, "grad_norm": 1.0707000494003296, "learning_rate": 8.532818532818533e-05, "loss": 0.6814, "step": 884 }, { "epoch": 0.17088240973160843, "grad_norm": 1.0156493186950684, "learning_rate": 8.542471042471043e-05, "loss": 0.6541, "step": 885 }, { "epoch": 0.1710754972002317, "grad_norm": 1.094224452972412, "learning_rate": 8.552123552123553e-05, "loss": 0.6937, "step": 886 }, { "epoch": 0.171268584668855, "grad_norm": 3.314046621322632, "learning_rate": 8.561776061776062e-05, "loss": 0.7035, "step": 887 }, { "epoch": 0.1714616721374783, "grad_norm": 1.0937196016311646, "learning_rate": 8.571428571428571e-05, "loss": 0.6475, "step": 888 }, { "epoch": 0.17165475960610158, "grad_norm": 0.662585437297821, "learning_rate": 8.581081081081082e-05, "loss": 0.7123, "step": 889 }, { "epoch": 0.17184784707472486, "grad_norm": 0.9477655291557312, "learning_rate": 8.590733590733591e-05, "loss": 0.7159, "step": 890 }, { "epoch": 0.17204093454334812, "grad_norm": 1.6124012470245361, "learning_rate": 8.6003861003861e-05, "loss": 0.6622, "step": 891 }, { "epoch": 0.1722340220119714, "grad_norm": 2.981684923171997, "learning_rate": 8.61003861003861e-05, "loss": 0.7063, "step": 892 }, { "epoch": 0.1724271094805947, "grad_norm": 1.0245795249938965, "learning_rate": 8.61969111969112e-05, "loss": 0.777, "step": 893 }, { "epoch": 0.17262019694921799, "grad_norm": 4.556847095489502, "learning_rate": 8.62934362934363e-05, "loss": 0.723, "step": 894 }, { "epoch": 0.17281328441784127, "grad_norm": 0.8087945580482483, "learning_rate": 8.638996138996139e-05, "loss": 0.7956, "step": 895 }, { "epoch": 0.17300637188646456, "grad_norm": 1.1286685466766357, "learning_rate": 8.64864864864865e-05, "loss": 0.6563, "step": 896 }, { "epoch": 0.17319945935508785, "grad_norm": 1.4172626733779907, "learning_rate": 8.658301158301159e-05, "loss": 0.7049, "step": 897 }, { "epoch": 0.17339254682371114, "grad_norm": 10.465795516967773, "learning_rate": 8.667953667953668e-05, "loss": 0.7024, "step": 898 }, { "epoch": 0.17358563429233442, "grad_norm": 0.8399300575256348, "learning_rate": 8.677606177606178e-05, "loss": 0.7052, "step": 899 }, { "epoch": 0.1737787217609577, "grad_norm": 0.8643333315849304, "learning_rate": 8.687258687258688e-05, "loss": 0.672, "step": 900 }, { "epoch": 0.173971809229581, "grad_norm": 0.8640925288200378, "learning_rate": 8.696911196911197e-05, "loss": 0.7168, "step": 901 }, { "epoch": 0.17416489669820429, "grad_norm": 0.821960985660553, "learning_rate": 8.706563706563707e-05, "loss": 0.7221, "step": 902 }, { "epoch": 0.17435798416682757, "grad_norm": 0.9168650507926941, "learning_rate": 8.716216216216216e-05, "loss": 0.6995, "step": 903 }, { "epoch": 0.17455107163545086, "grad_norm": 0.8112331032752991, "learning_rate": 8.725868725868727e-05, "loss": 0.7209, "step": 904 }, { "epoch": 0.17474415910407415, "grad_norm": 1.154521107673645, "learning_rate": 8.735521235521236e-05, "loss": 0.6471, "step": 905 }, { "epoch": 0.17493724657269744, "grad_norm": 1.007033348083496, "learning_rate": 8.745173745173745e-05, "loss": 0.7623, "step": 906 }, { "epoch": 0.17513033404132072, "grad_norm": 0.9781859517097473, "learning_rate": 8.754826254826255e-05, "loss": 0.6786, "step": 907 }, { "epoch": 0.175323421509944, "grad_norm": 1.0999293327331543, "learning_rate": 8.764478764478765e-05, "loss": 0.6676, "step": 908 }, { "epoch": 0.1755165089785673, "grad_norm": 0.788378894329071, "learning_rate": 8.774131274131274e-05, "loss": 0.7282, "step": 909 }, { "epoch": 0.17570959644719059, "grad_norm": 1.9941655397415161, "learning_rate": 8.783783783783784e-05, "loss": 0.6634, "step": 910 }, { "epoch": 0.17590268391581387, "grad_norm": 0.9104443788528442, "learning_rate": 8.793436293436293e-05, "loss": 0.6937, "step": 911 }, { "epoch": 0.17609577138443716, "grad_norm": 0.8452280163764954, "learning_rate": 8.803088803088804e-05, "loss": 0.697, "step": 912 }, { "epoch": 0.17628885885306045, "grad_norm": 0.8371540904045105, "learning_rate": 8.812741312741313e-05, "loss": 0.7092, "step": 913 }, { "epoch": 0.17648194632168374, "grad_norm": 0.8217237591743469, "learning_rate": 8.822393822393822e-05, "loss": 0.7056, "step": 914 }, { "epoch": 0.176675033790307, "grad_norm": 0.9251959919929504, "learning_rate": 8.832046332046332e-05, "loss": 0.7487, "step": 915 }, { "epoch": 0.17686812125893028, "grad_norm": 1.1947638988494873, "learning_rate": 8.841698841698842e-05, "loss": 0.6829, "step": 916 }, { "epoch": 0.17706120872755357, "grad_norm": 7.928412437438965, "learning_rate": 8.851351351351352e-05, "loss": 0.7165, "step": 917 }, { "epoch": 0.17725429619617686, "grad_norm": 0.9985291957855225, "learning_rate": 8.861003861003861e-05, "loss": 0.7242, "step": 918 }, { "epoch": 0.17744738366480015, "grad_norm": 0.8369446992874146, "learning_rate": 8.87065637065637e-05, "loss": 0.6929, "step": 919 }, { "epoch": 0.17764047113342343, "grad_norm": 1.0385513305664062, "learning_rate": 8.880308880308881e-05, "loss": 0.6786, "step": 920 }, { "epoch": 0.17783355860204672, "grad_norm": 0.9853262305259705, "learning_rate": 8.889961389961391e-05, "loss": 0.7429, "step": 921 }, { "epoch": 0.17802664607067, "grad_norm": 0.7300809025764465, "learning_rate": 8.899613899613901e-05, "loss": 0.7172, "step": 922 }, { "epoch": 0.1782197335392933, "grad_norm": 1.6370866298675537, "learning_rate": 8.90926640926641e-05, "loss": 0.6964, "step": 923 }, { "epoch": 0.17841282100791658, "grad_norm": 0.8653240203857422, "learning_rate": 8.918918918918919e-05, "loss": 0.7151, "step": 924 }, { "epoch": 0.17860590847653987, "grad_norm": 0.8798837065696716, "learning_rate": 8.92857142857143e-05, "loss": 0.6953, "step": 925 }, { "epoch": 0.17879899594516316, "grad_norm": 0.792323112487793, "learning_rate": 8.938223938223939e-05, "loss": 0.7298, "step": 926 }, { "epoch": 0.17899208341378645, "grad_norm": 0.7897618412971497, "learning_rate": 8.947876447876449e-05, "loss": 0.6776, "step": 927 }, { "epoch": 0.17918517088240973, "grad_norm": 0.9246671795845032, "learning_rate": 8.957528957528958e-05, "loss": 0.7374, "step": 928 }, { "epoch": 0.17937825835103302, "grad_norm": 0.7918031811714172, "learning_rate": 8.967181467181468e-05, "loss": 0.7475, "step": 929 }, { "epoch": 0.1795713458196563, "grad_norm": 1.083585500717163, "learning_rate": 8.976833976833978e-05, "loss": 0.615, "step": 930 }, { "epoch": 0.1797644332882796, "grad_norm": 0.6776149868965149, "learning_rate": 8.986486486486487e-05, "loss": 0.6521, "step": 931 }, { "epoch": 0.17995752075690288, "grad_norm": 0.6797517538070679, "learning_rate": 8.996138996138996e-05, "loss": 0.7021, "step": 932 }, { "epoch": 0.18015060822552617, "grad_norm": 2.214228868484497, "learning_rate": 9.005791505791507e-05, "loss": 0.6743, "step": 933 }, { "epoch": 0.18034369569414946, "grad_norm": 0.8201547265052795, "learning_rate": 9.015444015444016e-05, "loss": 0.7429, "step": 934 }, { "epoch": 0.18053678316277275, "grad_norm": 0.7453049421310425, "learning_rate": 9.025096525096526e-05, "loss": 0.7223, "step": 935 }, { "epoch": 0.18072987063139603, "grad_norm": 0.6878924369812012, "learning_rate": 9.034749034749035e-05, "loss": 0.7149, "step": 936 }, { "epoch": 0.18092295810001932, "grad_norm": 0.7195442914962769, "learning_rate": 9.044401544401545e-05, "loss": 0.7131, "step": 937 }, { "epoch": 0.1811160455686426, "grad_norm": 1.157145380973816, "learning_rate": 9.054054054054055e-05, "loss": 0.6957, "step": 938 }, { "epoch": 0.18130913303726587, "grad_norm": 3.6909892559051514, "learning_rate": 9.063706563706564e-05, "loss": 0.6334, "step": 939 }, { "epoch": 0.18150222050588916, "grad_norm": 0.7897803783416748, "learning_rate": 9.073359073359073e-05, "loss": 0.6954, "step": 940 }, { "epoch": 0.18169530797451244, "grad_norm": 0.779519259929657, "learning_rate": 9.083011583011584e-05, "loss": 0.7285, "step": 941 }, { "epoch": 0.18188839544313573, "grad_norm": 0.9331222772598267, "learning_rate": 9.092664092664093e-05, "loss": 0.7112, "step": 942 }, { "epoch": 0.18208148291175902, "grad_norm": 2.9930367469787598, "learning_rate": 9.102316602316603e-05, "loss": 0.6886, "step": 943 }, { "epoch": 0.1822745703803823, "grad_norm": 0.9092720150947571, "learning_rate": 9.111969111969112e-05, "loss": 0.7064, "step": 944 }, { "epoch": 0.1824676578490056, "grad_norm": 1.070980429649353, "learning_rate": 9.121621621621623e-05, "loss": 0.719, "step": 945 }, { "epoch": 0.18266074531762888, "grad_norm": 0.7393388152122498, "learning_rate": 9.131274131274132e-05, "loss": 0.7032, "step": 946 }, { "epoch": 0.18285383278625217, "grad_norm": 0.7857952117919922, "learning_rate": 9.140926640926641e-05, "loss": 0.739, "step": 947 }, { "epoch": 0.18304692025487546, "grad_norm": 1.3407809734344482, "learning_rate": 9.15057915057915e-05, "loss": 0.652, "step": 948 }, { "epoch": 0.18324000772349874, "grad_norm": 0.731354832649231, "learning_rate": 9.160231660231661e-05, "loss": 0.6635, "step": 949 }, { "epoch": 0.18343309519212203, "grad_norm": 0.6672086715698242, "learning_rate": 9.16988416988417e-05, "loss": 0.6779, "step": 950 }, { "epoch": 0.18362618266074532, "grad_norm": 0.7401121258735657, "learning_rate": 9.17953667953668e-05, "loss": 0.7283, "step": 951 }, { "epoch": 0.1838192701293686, "grad_norm": 0.7184818983078003, "learning_rate": 9.18918918918919e-05, "loss": 0.6572, "step": 952 }, { "epoch": 0.1840123575979919, "grad_norm": 1.2226852178573608, "learning_rate": 9.1988416988417e-05, "loss": 0.651, "step": 953 }, { "epoch": 0.18420544506661518, "grad_norm": 1.119523286819458, "learning_rate": 9.208494208494209e-05, "loss": 0.7933, "step": 954 }, { "epoch": 0.18439853253523847, "grad_norm": 1.075939655303955, "learning_rate": 9.218146718146718e-05, "loss": 0.6149, "step": 955 }, { "epoch": 0.18459162000386176, "grad_norm": 0.732899010181427, "learning_rate": 9.227799227799229e-05, "loss": 0.6509, "step": 956 }, { "epoch": 0.18478470747248504, "grad_norm": 0.793919026851654, "learning_rate": 9.237451737451738e-05, "loss": 0.693, "step": 957 }, { "epoch": 0.18497779494110833, "grad_norm": 0.8650076985359192, "learning_rate": 9.247104247104247e-05, "loss": 0.7068, "step": 958 }, { "epoch": 0.18517088240973162, "grad_norm": 1.1162205934524536, "learning_rate": 9.256756756756757e-05, "loss": 0.6627, "step": 959 }, { "epoch": 0.1853639698783549, "grad_norm": 0.9702245593070984, "learning_rate": 9.266409266409267e-05, "loss": 0.6808, "step": 960 }, { "epoch": 0.1855570573469782, "grad_norm": 0.7826595306396484, "learning_rate": 9.276061776061777e-05, "loss": 0.7414, "step": 961 }, { "epoch": 0.18575014481560148, "grad_norm": 0.8053584098815918, "learning_rate": 9.285714285714286e-05, "loss": 0.751, "step": 962 }, { "epoch": 0.18594323228422474, "grad_norm": 0.6173267364501953, "learning_rate": 9.295366795366795e-05, "loss": 0.7103, "step": 963 }, { "epoch": 0.18613631975284803, "grad_norm": 0.6908634305000305, "learning_rate": 9.305019305019306e-05, "loss": 0.6714, "step": 964 }, { "epoch": 0.18632940722147132, "grad_norm": 0.6428989171981812, "learning_rate": 9.314671814671815e-05, "loss": 0.7262, "step": 965 }, { "epoch": 0.1865224946900946, "grad_norm": 0.820885181427002, "learning_rate": 9.324324324324324e-05, "loss": 0.7282, "step": 966 }, { "epoch": 0.1867155821587179, "grad_norm": 0.8906183242797852, "learning_rate": 9.333976833976834e-05, "loss": 0.7504, "step": 967 }, { "epoch": 0.18690866962734118, "grad_norm": 0.7172391414642334, "learning_rate": 9.343629343629344e-05, "loss": 0.6691, "step": 968 }, { "epoch": 0.18710175709596447, "grad_norm": 1.1892720460891724, "learning_rate": 9.353281853281854e-05, "loss": 0.7276, "step": 969 }, { "epoch": 0.18729484456458775, "grad_norm": 0.74247145652771, "learning_rate": 9.362934362934363e-05, "loss": 0.749, "step": 970 }, { "epoch": 0.18748793203321104, "grad_norm": 0.8118295073509216, "learning_rate": 9.372586872586872e-05, "loss": 0.7143, "step": 971 }, { "epoch": 0.18768101950183433, "grad_norm": 1.4219880104064941, "learning_rate": 9.382239382239383e-05, "loss": 0.7084, "step": 972 }, { "epoch": 0.18787410697045762, "grad_norm": 0.7963294982910156, "learning_rate": 9.391891891891892e-05, "loss": 0.6959, "step": 973 }, { "epoch": 0.1880671944390809, "grad_norm": 1.458555817604065, "learning_rate": 9.401544401544401e-05, "loss": 0.7062, "step": 974 }, { "epoch": 0.1882602819077042, "grad_norm": 0.8623759746551514, "learning_rate": 9.411196911196911e-05, "loss": 0.7205, "step": 975 }, { "epoch": 0.18845336937632748, "grad_norm": 2.061478853225708, "learning_rate": 9.420849420849421e-05, "loss": 0.6644, "step": 976 }, { "epoch": 0.18864645684495077, "grad_norm": 0.7694371342658997, "learning_rate": 9.43050193050193e-05, "loss": 0.6994, "step": 977 }, { "epoch": 0.18883954431357405, "grad_norm": 1.2149608135223389, "learning_rate": 9.44015444015444e-05, "loss": 0.6961, "step": 978 }, { "epoch": 0.18903263178219734, "grad_norm": 0.5896968841552734, "learning_rate": 9.44980694980695e-05, "loss": 0.771, "step": 979 }, { "epoch": 0.18922571925082063, "grad_norm": 0.6815376877784729, "learning_rate": 9.45945945945946e-05, "loss": 0.7292, "step": 980 }, { "epoch": 0.18941880671944392, "grad_norm": 0.9844634532928467, "learning_rate": 9.46911196911197e-05, "loss": 0.6962, "step": 981 }, { "epoch": 0.1896118941880672, "grad_norm": 0.6530974507331848, "learning_rate": 9.47876447876448e-05, "loss": 0.7338, "step": 982 }, { "epoch": 0.1898049816566905, "grad_norm": 1.176620364189148, "learning_rate": 9.488416988416989e-05, "loss": 0.7005, "step": 983 }, { "epoch": 0.18999806912531378, "grad_norm": 0.7119463682174683, "learning_rate": 9.498069498069498e-05, "loss": 0.7243, "step": 984 }, { "epoch": 0.19019115659393707, "grad_norm": 0.8077830076217651, "learning_rate": 9.507722007722009e-05, "loss": 0.6924, "step": 985 }, { "epoch": 0.19038424406256033, "grad_norm": 0.9409700632095337, "learning_rate": 9.517374517374518e-05, "loss": 0.666, "step": 986 }, { "epoch": 0.1905773315311836, "grad_norm": 0.7381781935691833, "learning_rate": 9.527027027027028e-05, "loss": 0.7443, "step": 987 }, { "epoch": 0.1907704189998069, "grad_norm": 0.5553953051567078, "learning_rate": 9.536679536679537e-05, "loss": 0.7697, "step": 988 }, { "epoch": 0.1909635064684302, "grad_norm": 0.5811437368392944, "learning_rate": 9.546332046332048e-05, "loss": 0.7087, "step": 989 }, { "epoch": 0.19115659393705348, "grad_norm": 1.273707628250122, "learning_rate": 9.555984555984557e-05, "loss": 0.7301, "step": 990 }, { "epoch": 0.19134968140567676, "grad_norm": 0.6765920519828796, "learning_rate": 9.565637065637066e-05, "loss": 0.6733, "step": 991 }, { "epoch": 0.19154276887430005, "grad_norm": 0.6986637115478516, "learning_rate": 9.575289575289575e-05, "loss": 0.7191, "step": 992 }, { "epoch": 0.19173585634292334, "grad_norm": 1.343916416168213, "learning_rate": 9.584942084942086e-05, "loss": 0.7378, "step": 993 }, { "epoch": 0.19192894381154663, "grad_norm": 1.0831619501113892, "learning_rate": 9.594594594594595e-05, "loss": 0.6781, "step": 994 }, { "epoch": 0.1921220312801699, "grad_norm": 0.5733482241630554, "learning_rate": 9.604247104247105e-05, "loss": 0.6829, "step": 995 }, { "epoch": 0.1923151187487932, "grad_norm": 0.702114462852478, "learning_rate": 9.613899613899614e-05, "loss": 0.7433, "step": 996 }, { "epoch": 0.1925082062174165, "grad_norm": 2.01216983795166, "learning_rate": 9.623552123552125e-05, "loss": 0.7081, "step": 997 }, { "epoch": 0.19270129368603978, "grad_norm": 0.5673854351043701, "learning_rate": 9.633204633204634e-05, "loss": 0.694, "step": 998 }, { "epoch": 0.19289438115466306, "grad_norm": 0.5474029779434204, "learning_rate": 9.642857142857143e-05, "loss": 0.6749, "step": 999 }, { "epoch": 0.19308746862328635, "grad_norm": 9.80802059173584, "learning_rate": 9.652509652509652e-05, "loss": 0.7819, "step": 1000 }, { "epoch": 0.19308746862328635, "eval_loss": 0.7471511363983154, "eval_runtime": 68.1286, "eval_samples_per_second": 9.746, "eval_steps_per_second": 0.308, "step": 1000 }, { "epoch": 0.19328055609190964, "grad_norm": 0.7514103651046753, "learning_rate": 9.662162162162163e-05, "loss": 0.7809, "step": 1001 }, { "epoch": 0.19347364356053293, "grad_norm": 0.8476603031158447, "learning_rate": 9.671814671814672e-05, "loss": 0.6523, "step": 1002 }, { "epoch": 0.1936667310291562, "grad_norm": 0.73158198595047, "learning_rate": 9.681467181467182e-05, "loss": 0.7521, "step": 1003 }, { "epoch": 0.1938598184977795, "grad_norm": 0.7130540609359741, "learning_rate": 9.691119691119691e-05, "loss": 0.7372, "step": 1004 }, { "epoch": 0.1940529059664028, "grad_norm": 0.6814699172973633, "learning_rate": 9.700772200772202e-05, "loss": 0.6991, "step": 1005 }, { "epoch": 0.19424599343502608, "grad_norm": 0.625819206237793, "learning_rate": 9.710424710424711e-05, "loss": 0.676, "step": 1006 }, { "epoch": 0.19443908090364936, "grad_norm": 0.9465999007225037, "learning_rate": 9.72007722007722e-05, "loss": 0.7325, "step": 1007 }, { "epoch": 0.19463216837227265, "grad_norm": 1.4684042930603027, "learning_rate": 9.729729729729731e-05, "loss": 0.6855, "step": 1008 }, { "epoch": 0.19482525584089594, "grad_norm": 0.7536947131156921, "learning_rate": 9.73938223938224e-05, "loss": 0.7638, "step": 1009 }, { "epoch": 0.1950183433095192, "grad_norm": 0.7226930260658264, "learning_rate": 9.74903474903475e-05, "loss": 0.6291, "step": 1010 }, { "epoch": 0.1952114307781425, "grad_norm": 0.6764536499977112, "learning_rate": 9.758687258687259e-05, "loss": 0.6932, "step": 1011 }, { "epoch": 0.19540451824676577, "grad_norm": 0.8701961636543274, "learning_rate": 9.76833976833977e-05, "loss": 0.7646, "step": 1012 }, { "epoch": 0.19559760571538906, "grad_norm": 0.7298441529273987, "learning_rate": 9.777992277992279e-05, "loss": 0.6428, "step": 1013 }, { "epoch": 0.19579069318401235, "grad_norm": 0.593739926815033, "learning_rate": 9.787644787644788e-05, "loss": 0.6831, "step": 1014 }, { "epoch": 0.19598378065263564, "grad_norm": 0.8013108372688293, "learning_rate": 9.797297297297297e-05, "loss": 0.7199, "step": 1015 }, { "epoch": 0.19617686812125892, "grad_norm": 0.9249262809753418, "learning_rate": 9.806949806949808e-05, "loss": 0.6585, "step": 1016 }, { "epoch": 0.1963699555898822, "grad_norm": 0.7745546698570251, "learning_rate": 9.816602316602317e-05, "loss": 0.7424, "step": 1017 }, { "epoch": 0.1965630430585055, "grad_norm": 0.690052330493927, "learning_rate": 9.826254826254826e-05, "loss": 0.7159, "step": 1018 }, { "epoch": 0.1967561305271288, "grad_norm": 0.6928214430809021, "learning_rate": 9.835907335907336e-05, "loss": 0.6928, "step": 1019 }, { "epoch": 0.19694921799575207, "grad_norm": 0.6822926998138428, "learning_rate": 9.845559845559846e-05, "loss": 0.6668, "step": 1020 }, { "epoch": 0.19714230546437536, "grad_norm": 0.9410924911499023, "learning_rate": 9.855212355212356e-05, "loss": 0.7383, "step": 1021 }, { "epoch": 0.19733539293299865, "grad_norm": 0.7106058597564697, "learning_rate": 9.864864864864865e-05, "loss": 0.6839, "step": 1022 }, { "epoch": 0.19752848040162194, "grad_norm": 0.7438758015632629, "learning_rate": 9.874517374517374e-05, "loss": 0.6896, "step": 1023 }, { "epoch": 0.19772156787024522, "grad_norm": 0.6371986865997314, "learning_rate": 9.884169884169885e-05, "loss": 0.6748, "step": 1024 }, { "epoch": 0.1979146553388685, "grad_norm": 0.7627459764480591, "learning_rate": 9.893822393822394e-05, "loss": 0.6874, "step": 1025 }, { "epoch": 0.1981077428074918, "grad_norm": 1.0652226209640503, "learning_rate": 9.903474903474904e-05, "loss": 0.7809, "step": 1026 }, { "epoch": 0.1983008302761151, "grad_norm": 2.3127284049987793, "learning_rate": 9.913127413127413e-05, "loss": 0.7043, "step": 1027 }, { "epoch": 0.19849391774473837, "grad_norm": 0.63377445936203, "learning_rate": 9.922779922779923e-05, "loss": 0.7457, "step": 1028 }, { "epoch": 0.19868700521336166, "grad_norm": 0.531995952129364, "learning_rate": 9.932432432432433e-05, "loss": 0.7455, "step": 1029 }, { "epoch": 0.19888009268198495, "grad_norm": 0.5376825332641602, "learning_rate": 9.942084942084942e-05, "loss": 0.7429, "step": 1030 }, { "epoch": 0.19907318015060824, "grad_norm": 0.6501802802085876, "learning_rate": 9.951737451737451e-05, "loss": 0.671, "step": 1031 }, { "epoch": 0.19926626761923152, "grad_norm": 0.6446878910064697, "learning_rate": 9.961389961389962e-05, "loss": 0.7684, "step": 1032 }, { "epoch": 0.1994593550878548, "grad_norm": 0.6023703813552856, "learning_rate": 9.971042471042471e-05, "loss": 0.6661, "step": 1033 }, { "epoch": 0.19965244255647807, "grad_norm": 0.8434319496154785, "learning_rate": 9.98069498069498e-05, "loss": 0.7889, "step": 1034 }, { "epoch": 0.19984553002510136, "grad_norm": 0.6809189915657043, "learning_rate": 9.99034749034749e-05, "loss": 0.6583, "step": 1035 }, { "epoch": 0.20003861749372465, "grad_norm": 0.47468453645706177, "learning_rate": 0.0001, "loss": 0.7085, "step": 1036 }, { "epoch": 0.20023170496234793, "grad_norm": 0.6617467403411865, "learning_rate": 9.999999716063286e-05, "loss": 0.671, "step": 1037 }, { "epoch": 0.20042479243097122, "grad_norm": 0.6420814394950867, "learning_rate": 9.999998864253175e-05, "loss": 0.7505, "step": 1038 }, { "epoch": 0.2006178798995945, "grad_norm": 0.7835135459899902, "learning_rate": 9.999997444569762e-05, "loss": 0.7434, "step": 1039 }, { "epoch": 0.2008109673682178, "grad_norm": 0.7671399116516113, "learning_rate": 9.999995457013213e-05, "loss": 0.7064, "step": 1040 }, { "epoch": 0.20100405483684108, "grad_norm": 0.62278813123703, "learning_rate": 9.99999290158375e-05, "loss": 0.6798, "step": 1041 }, { "epoch": 0.20119714230546437, "grad_norm": 1.892646074295044, "learning_rate": 9.999989778281664e-05, "loss": 0.7272, "step": 1042 }, { "epoch": 0.20139022977408766, "grad_norm": 0.5923601984977722, "learning_rate": 9.999986087107309e-05, "loss": 0.6848, "step": 1043 }, { "epoch": 0.20158331724271095, "grad_norm": 1.6523208618164062, "learning_rate": 9.999981828061105e-05, "loss": 0.6981, "step": 1044 }, { "epoch": 0.20177640471133423, "grad_norm": 0.7431443929672241, "learning_rate": 9.999977001143537e-05, "loss": 0.7844, "step": 1045 }, { "epoch": 0.20196949217995752, "grad_norm": 0.6774401664733887, "learning_rate": 9.999971606355151e-05, "loss": 0.7034, "step": 1046 }, { "epoch": 0.2021625796485808, "grad_norm": 0.6236673593521118, "learning_rate": 9.999965643696562e-05, "loss": 0.7176, "step": 1047 }, { "epoch": 0.2023556671172041, "grad_norm": 0.47953540086746216, "learning_rate": 9.999959113168444e-05, "loss": 0.7278, "step": 1048 }, { "epoch": 0.20254875458582738, "grad_norm": 0.5871118307113647, "learning_rate": 9.999952014771543e-05, "loss": 0.6979, "step": 1049 }, { "epoch": 0.20274184205445067, "grad_norm": 1.0550580024719238, "learning_rate": 9.999944348506661e-05, "loss": 0.6662, "step": 1050 }, { "epoch": 0.20293492952307396, "grad_norm": 0.9053893685340881, "learning_rate": 9.99993611437467e-05, "loss": 0.7119, "step": 1051 }, { "epoch": 0.20312801699169725, "grad_norm": 1.1754294633865356, "learning_rate": 9.999927312376508e-05, "loss": 0.7082, "step": 1052 }, { "epoch": 0.20332110446032053, "grad_norm": 0.6831271648406982, "learning_rate": 9.999917942513172e-05, "loss": 0.6882, "step": 1053 }, { "epoch": 0.20351419192894382, "grad_norm": 1.017164707183838, "learning_rate": 9.999908004785725e-05, "loss": 0.7213, "step": 1054 }, { "epoch": 0.2037072793975671, "grad_norm": 0.7240299582481384, "learning_rate": 9.999897499195299e-05, "loss": 0.7055, "step": 1055 }, { "epoch": 0.2039003668661904, "grad_norm": 0.5810226798057556, "learning_rate": 9.999886425743084e-05, "loss": 0.6692, "step": 1056 }, { "epoch": 0.20409345433481368, "grad_norm": 0.8812791705131531, "learning_rate": 9.99987478443034e-05, "loss": 0.675, "step": 1057 }, { "epoch": 0.20428654180343694, "grad_norm": 1.568663239479065, "learning_rate": 9.999862575258388e-05, "loss": 0.7357, "step": 1058 }, { "epoch": 0.20447962927206023, "grad_norm": 1.5504759550094604, "learning_rate": 9.999849798228614e-05, "loss": 0.7269, "step": 1059 }, { "epoch": 0.20467271674068352, "grad_norm": 0.6448707580566406, "learning_rate": 9.99983645334247e-05, "loss": 0.7154, "step": 1060 }, { "epoch": 0.2048658042093068, "grad_norm": 0.6165869235992432, "learning_rate": 9.999822540601474e-05, "loss": 0.7114, "step": 1061 }, { "epoch": 0.2050588916779301, "grad_norm": 0.6299775242805481, "learning_rate": 9.999808060007201e-05, "loss": 0.6537, "step": 1062 }, { "epoch": 0.20525197914655338, "grad_norm": 0.9465042948722839, "learning_rate": 9.9997930115613e-05, "loss": 0.6262, "step": 1063 }, { "epoch": 0.20544506661517667, "grad_norm": 0.5364672541618347, "learning_rate": 9.99977739526548e-05, "loss": 0.6644, "step": 1064 }, { "epoch": 0.20563815408379996, "grad_norm": 0.5597127079963684, "learning_rate": 9.99976121112151e-05, "loss": 0.6882, "step": 1065 }, { "epoch": 0.20583124155242324, "grad_norm": 0.6260690689086914, "learning_rate": 9.999744459131233e-05, "loss": 0.7061, "step": 1066 }, { "epoch": 0.20602432902104653, "grad_norm": 0.4986824095249176, "learning_rate": 9.999727139296551e-05, "loss": 0.6571, "step": 1067 }, { "epoch": 0.20621741648966982, "grad_norm": 0.7391287684440613, "learning_rate": 9.999709251619428e-05, "loss": 0.682, "step": 1068 }, { "epoch": 0.2064105039582931, "grad_norm": 0.6883731484413147, "learning_rate": 9.999690796101899e-05, "loss": 0.6051, "step": 1069 }, { "epoch": 0.2066035914269164, "grad_norm": 0.5453018546104431, "learning_rate": 9.999671772746058e-05, "loss": 0.7084, "step": 1070 }, { "epoch": 0.20679667889553968, "grad_norm": 0.6600997447967529, "learning_rate": 9.999652181554066e-05, "loss": 0.7901, "step": 1071 }, { "epoch": 0.20698976636416297, "grad_norm": 0.5246230959892273, "learning_rate": 9.999632022528148e-05, "loss": 0.7132, "step": 1072 }, { "epoch": 0.20718285383278626, "grad_norm": 1.00343918800354, "learning_rate": 9.999611295670594e-05, "loss": 0.7355, "step": 1073 }, { "epoch": 0.20737594130140954, "grad_norm": 0.5637930035591125, "learning_rate": 9.999590000983758e-05, "loss": 0.7078, "step": 1074 }, { "epoch": 0.20756902877003283, "grad_norm": 0.49753305315971375, "learning_rate": 9.99956813847006e-05, "loss": 0.6668, "step": 1075 }, { "epoch": 0.20776211623865612, "grad_norm": 0.6152164340019226, "learning_rate": 9.999545708131978e-05, "loss": 0.6667, "step": 1076 }, { "epoch": 0.2079552037072794, "grad_norm": 0.8412054777145386, "learning_rate": 9.999522709972067e-05, "loss": 0.639, "step": 1077 }, { "epoch": 0.2081482911759027, "grad_norm": 0.5355746746063232, "learning_rate": 9.999499143992933e-05, "loss": 0.7033, "step": 1078 }, { "epoch": 0.20834137864452598, "grad_norm": 0.7270849347114563, "learning_rate": 9.999475010197255e-05, "loss": 0.6458, "step": 1079 }, { "epoch": 0.20853446611314927, "grad_norm": 0.5444059371948242, "learning_rate": 9.999450308587772e-05, "loss": 0.6536, "step": 1080 }, { "epoch": 0.20872755358177253, "grad_norm": 0.49031567573547363, "learning_rate": 9.999425039167292e-05, "loss": 0.7326, "step": 1081 }, { "epoch": 0.20892064105039582, "grad_norm": 0.5984886884689331, "learning_rate": 9.999399201938685e-05, "loss": 0.6886, "step": 1082 }, { "epoch": 0.2091137285190191, "grad_norm": 0.6640185713768005, "learning_rate": 9.999372796904883e-05, "loss": 0.6623, "step": 1083 }, { "epoch": 0.2093068159876424, "grad_norm": 0.5586106777191162, "learning_rate": 9.999345824068886e-05, "loss": 0.667, "step": 1084 }, { "epoch": 0.20949990345626568, "grad_norm": 0.5642006397247314, "learning_rate": 9.999318283433758e-05, "loss": 0.7198, "step": 1085 }, { "epoch": 0.20969299092488897, "grad_norm": 0.7414863705635071, "learning_rate": 9.99929017500263e-05, "loss": 0.716, "step": 1086 }, { "epoch": 0.20988607839351225, "grad_norm": 0.5724707245826721, "learning_rate": 9.999261498778687e-05, "loss": 0.6911, "step": 1087 }, { "epoch": 0.21007916586213554, "grad_norm": 0.5451279878616333, "learning_rate": 9.99923225476519e-05, "loss": 0.7524, "step": 1088 }, { "epoch": 0.21027225333075883, "grad_norm": 0.5798298716545105, "learning_rate": 9.999202442965463e-05, "loss": 0.6647, "step": 1089 }, { "epoch": 0.21046534079938212, "grad_norm": 0.7412410378456116, "learning_rate": 9.999172063382887e-05, "loss": 0.615, "step": 1090 }, { "epoch": 0.2106584282680054, "grad_norm": 0.6126954555511475, "learning_rate": 9.999141116020916e-05, "loss": 0.6845, "step": 1091 }, { "epoch": 0.2108515157366287, "grad_norm": 0.527917206287384, "learning_rate": 9.999109600883062e-05, "loss": 0.739, "step": 1092 }, { "epoch": 0.21104460320525198, "grad_norm": 0.438515841960907, "learning_rate": 9.999077517972905e-05, "loss": 0.6957, "step": 1093 }, { "epoch": 0.21123769067387527, "grad_norm": 0.8389379978179932, "learning_rate": 9.999044867294092e-05, "loss": 0.6606, "step": 1094 }, { "epoch": 0.21143077814249855, "grad_norm": 0.665282130241394, "learning_rate": 9.999011648850329e-05, "loss": 0.6451, "step": 1095 }, { "epoch": 0.21162386561112184, "grad_norm": 0.5958911776542664, "learning_rate": 9.998977862645386e-05, "loss": 0.6983, "step": 1096 }, { "epoch": 0.21181695307974513, "grad_norm": 0.5577141046524048, "learning_rate": 9.998943508683105e-05, "loss": 0.6874, "step": 1097 }, { "epoch": 0.21201004054836842, "grad_norm": 1.6406807899475098, "learning_rate": 9.998908586967385e-05, "loss": 0.6995, "step": 1098 }, { "epoch": 0.2122031280169917, "grad_norm": 0.8275139331817627, "learning_rate": 9.998873097502193e-05, "loss": 0.7314, "step": 1099 }, { "epoch": 0.212396215485615, "grad_norm": 0.531247615814209, "learning_rate": 9.998837040291559e-05, "loss": 0.6559, "step": 1100 }, { "epoch": 0.21258930295423828, "grad_norm": 0.7473740577697754, "learning_rate": 9.998800415339579e-05, "loss": 0.7648, "step": 1101 }, { "epoch": 0.21278239042286157, "grad_norm": 0.5701355934143066, "learning_rate": 9.998763222650412e-05, "loss": 0.7234, "step": 1102 }, { "epoch": 0.21297547789148485, "grad_norm": 0.5198802947998047, "learning_rate": 9.998725462228282e-05, "loss": 0.7358, "step": 1103 }, { "epoch": 0.21316856536010814, "grad_norm": 0.4208330810070038, "learning_rate": 9.99868713407748e-05, "loss": 0.6822, "step": 1104 }, { "epoch": 0.2133616528287314, "grad_norm": 0.5960032939910889, "learning_rate": 9.998648238202355e-05, "loss": 0.633, "step": 1105 }, { "epoch": 0.2135547402973547, "grad_norm": 0.5772632360458374, "learning_rate": 9.99860877460733e-05, "loss": 0.669, "step": 1106 }, { "epoch": 0.21374782776597798, "grad_norm": 0.5728495121002197, "learning_rate": 9.998568743296881e-05, "loss": 0.6699, "step": 1107 }, { "epoch": 0.21394091523460126, "grad_norm": 0.9353781342506409, "learning_rate": 9.998528144275559e-05, "loss": 0.6722, "step": 1108 }, { "epoch": 0.21413400270322455, "grad_norm": 0.6441842317581177, "learning_rate": 9.998486977547972e-05, "loss": 0.6458, "step": 1109 }, { "epoch": 0.21432709017184784, "grad_norm": 0.5807494521141052, "learning_rate": 9.9984452431188e-05, "loss": 0.7391, "step": 1110 }, { "epoch": 0.21452017764047113, "grad_norm": 0.4626728892326355, "learning_rate": 9.998402940992776e-05, "loss": 0.7041, "step": 1111 }, { "epoch": 0.21471326510909441, "grad_norm": 0.6925112009048462, "learning_rate": 9.99836007117471e-05, "loss": 0.674, "step": 1112 }, { "epoch": 0.2149063525777177, "grad_norm": 0.6443095207214355, "learning_rate": 9.99831663366947e-05, "loss": 0.6633, "step": 1113 }, { "epoch": 0.215099440046341, "grad_norm": 0.5722222924232483, "learning_rate": 9.998272628481989e-05, "loss": 0.7235, "step": 1114 }, { "epoch": 0.21529252751496428, "grad_norm": 0.685859739780426, "learning_rate": 9.998228055617263e-05, "loss": 0.6724, "step": 1115 }, { "epoch": 0.21548561498358756, "grad_norm": 0.6683393120765686, "learning_rate": 9.998182915080358e-05, "loss": 0.6931, "step": 1116 }, { "epoch": 0.21567870245221085, "grad_norm": 0.5020275712013245, "learning_rate": 9.998137206876397e-05, "loss": 0.6984, "step": 1117 }, { "epoch": 0.21587178992083414, "grad_norm": 0.4372502267360687, "learning_rate": 9.998090931010573e-05, "loss": 0.6453, "step": 1118 }, { "epoch": 0.21606487738945743, "grad_norm": 0.42346787452697754, "learning_rate": 9.998044087488143e-05, "loss": 0.7283, "step": 1119 }, { "epoch": 0.21625796485808071, "grad_norm": 0.5169185400009155, "learning_rate": 9.997996676314426e-05, "loss": 0.729, "step": 1120 }, { "epoch": 0.216451052326704, "grad_norm": 0.686890721321106, "learning_rate": 9.997948697494805e-05, "loss": 0.7183, "step": 1121 }, { "epoch": 0.2166441397953273, "grad_norm": 0.7224063873291016, "learning_rate": 9.997900151034733e-05, "loss": 0.6719, "step": 1122 }, { "epoch": 0.21683722726395058, "grad_norm": 0.5496407747268677, "learning_rate": 9.997851036939721e-05, "loss": 0.6562, "step": 1123 }, { "epoch": 0.21703031473257386, "grad_norm": 0.45328837633132935, "learning_rate": 9.997801355215348e-05, "loss": 0.6849, "step": 1124 }, { "epoch": 0.21722340220119715, "grad_norm": 0.5998146533966064, "learning_rate": 9.997751105867256e-05, "loss": 0.7344, "step": 1125 }, { "epoch": 0.21741648966982044, "grad_norm": 0.5185806155204773, "learning_rate": 9.997700288901153e-05, "loss": 0.6881, "step": 1126 }, { "epoch": 0.21760957713844373, "grad_norm": 0.5387551784515381, "learning_rate": 9.99764890432281e-05, "loss": 0.7214, "step": 1127 }, { "epoch": 0.21780266460706701, "grad_norm": 0.638056755065918, "learning_rate": 9.997596952138062e-05, "loss": 0.7016, "step": 1128 }, { "epoch": 0.21799575207569027, "grad_norm": 0.4894709587097168, "learning_rate": 9.99754443235281e-05, "loss": 0.7065, "step": 1129 }, { "epoch": 0.21818883954431356, "grad_norm": 0.5061357617378235, "learning_rate": 9.99749134497302e-05, "loss": 0.6828, "step": 1130 }, { "epoch": 0.21838192701293685, "grad_norm": 0.6885358095169067, "learning_rate": 9.997437690004721e-05, "loss": 0.6942, "step": 1131 }, { "epoch": 0.21857501448156014, "grad_norm": 0.5364677906036377, "learning_rate": 9.997383467454004e-05, "loss": 0.7568, "step": 1132 }, { "epoch": 0.21876810195018342, "grad_norm": 0.5091676115989685, "learning_rate": 9.997328677327033e-05, "loss": 0.7729, "step": 1133 }, { "epoch": 0.2189611894188067, "grad_norm": 0.8811430931091309, "learning_rate": 9.997273319630026e-05, "loss": 0.7237, "step": 1134 }, { "epoch": 0.21915427688743, "grad_norm": 0.7509890794754028, "learning_rate": 9.997217394369271e-05, "loss": 0.6355, "step": 1135 }, { "epoch": 0.2193473643560533, "grad_norm": 0.7870464324951172, "learning_rate": 9.997160901551122e-05, "loss": 0.7179, "step": 1136 }, { "epoch": 0.21954045182467657, "grad_norm": 1.054459571838379, "learning_rate": 9.997103841181994e-05, "loss": 0.6973, "step": 1137 }, { "epoch": 0.21973353929329986, "grad_norm": 2.679799795150757, "learning_rate": 9.997046213268366e-05, "loss": 0.7074, "step": 1138 }, { "epoch": 0.21992662676192315, "grad_norm": 0.7892328500747681, "learning_rate": 9.996988017816785e-05, "loss": 0.7142, "step": 1139 }, { "epoch": 0.22011971423054644, "grad_norm": 0.540047287940979, "learning_rate": 9.99692925483386e-05, "loss": 0.6868, "step": 1140 }, { "epoch": 0.22031280169916972, "grad_norm": 0.6472477912902832, "learning_rate": 9.996869924326265e-05, "loss": 0.6748, "step": 1141 }, { "epoch": 0.220505889167793, "grad_norm": 0.8509537577629089, "learning_rate": 9.996810026300737e-05, "loss": 0.6531, "step": 1142 }, { "epoch": 0.2206989766364163, "grad_norm": 0.5590402483940125, "learning_rate": 9.996749560764081e-05, "loss": 0.6643, "step": 1143 }, { "epoch": 0.2208920641050396, "grad_norm": 0.5545117259025574, "learning_rate": 9.996688527723163e-05, "loss": 0.678, "step": 1144 }, { "epoch": 0.22108515157366287, "grad_norm": 1.5592830181121826, "learning_rate": 9.996626927184916e-05, "loss": 0.6656, "step": 1145 }, { "epoch": 0.22127823904228616, "grad_norm": 0.7121810913085938, "learning_rate": 9.996564759156335e-05, "loss": 0.6843, "step": 1146 }, { "epoch": 0.22147132651090945, "grad_norm": 0.4861655533313751, "learning_rate": 9.996502023644483e-05, "loss": 0.7168, "step": 1147 }, { "epoch": 0.22166441397953274, "grad_norm": 0.5164228677749634, "learning_rate": 9.996438720656482e-05, "loss": 0.6957, "step": 1148 }, { "epoch": 0.22185750144815602, "grad_norm": 0.5095804333686829, "learning_rate": 9.996374850199524e-05, "loss": 0.6833, "step": 1149 }, { "epoch": 0.2220505889167793, "grad_norm": 1.2987717390060425, "learning_rate": 9.99631041228086e-05, "loss": 0.7508, "step": 1150 }, { "epoch": 0.2222436763854026, "grad_norm": 0.7664031982421875, "learning_rate": 9.996245406907814e-05, "loss": 0.6705, "step": 1151 }, { "epoch": 0.2224367638540259, "grad_norm": 0.7276837825775146, "learning_rate": 9.996179834087762e-05, "loss": 0.6571, "step": 1152 }, { "epoch": 0.22262985132264915, "grad_norm": 0.5979049801826477, "learning_rate": 9.996113693828157e-05, "loss": 0.7309, "step": 1153 }, { "epoch": 0.22282293879127243, "grad_norm": 0.6280199885368347, "learning_rate": 9.996046986136509e-05, "loss": 0.6985, "step": 1154 }, { "epoch": 0.22301602625989572, "grad_norm": 0.5978015661239624, "learning_rate": 9.995979711020396e-05, "loss": 0.6653, "step": 1155 }, { "epoch": 0.223209113728519, "grad_norm": 0.479145884513855, "learning_rate": 9.995911868487455e-05, "loss": 0.6868, "step": 1156 }, { "epoch": 0.2234022011971423, "grad_norm": 1.3773549795150757, "learning_rate": 9.995843458545393e-05, "loss": 0.7485, "step": 1157 }, { "epoch": 0.22359528866576558, "grad_norm": 0.5039312243461609, "learning_rate": 9.995774481201979e-05, "loss": 0.6901, "step": 1158 }, { "epoch": 0.22378837613438887, "grad_norm": 0.6154244542121887, "learning_rate": 9.995704936465049e-05, "loss": 0.7125, "step": 1159 }, { "epoch": 0.22398146360301216, "grad_norm": 1.6100778579711914, "learning_rate": 9.995634824342501e-05, "loss": 0.719, "step": 1160 }, { "epoch": 0.22417455107163545, "grad_norm": 0.7784770727157593, "learning_rate": 9.995564144842297e-05, "loss": 0.6348, "step": 1161 }, { "epoch": 0.22436763854025873, "grad_norm": 0.7850674390792847, "learning_rate": 9.995492897972465e-05, "loss": 0.7118, "step": 1162 }, { "epoch": 0.22456072600888202, "grad_norm": 0.5933948755264282, "learning_rate": 9.995421083741095e-05, "loss": 0.7437, "step": 1163 }, { "epoch": 0.2247538134775053, "grad_norm": 0.59609055519104, "learning_rate": 9.995348702156347e-05, "loss": 0.6475, "step": 1164 }, { "epoch": 0.2249469009461286, "grad_norm": 0.5407630801200867, "learning_rate": 9.995275753226438e-05, "loss": 0.7077, "step": 1165 }, { "epoch": 0.22513998841475188, "grad_norm": 0.5638548731803894, "learning_rate": 9.995202236959655e-05, "loss": 0.6456, "step": 1166 }, { "epoch": 0.22533307588337517, "grad_norm": 0.6343022584915161, "learning_rate": 9.995128153364349e-05, "loss": 0.7687, "step": 1167 }, { "epoch": 0.22552616335199846, "grad_norm": 0.6060675978660583, "learning_rate": 9.995053502448932e-05, "loss": 0.7372, "step": 1168 }, { "epoch": 0.22571925082062175, "grad_norm": 0.7851900458335876, "learning_rate": 9.994978284221881e-05, "loss": 0.708, "step": 1169 }, { "epoch": 0.22591233828924503, "grad_norm": 0.5258160829544067, "learning_rate": 9.994902498691743e-05, "loss": 0.6837, "step": 1170 }, { "epoch": 0.22610542575786832, "grad_norm": 0.6399975419044495, "learning_rate": 9.994826145867121e-05, "loss": 0.6466, "step": 1171 }, { "epoch": 0.2262985132264916, "grad_norm": 0.5536001920700073, "learning_rate": 9.99474922575669e-05, "loss": 0.7183, "step": 1172 }, { "epoch": 0.2264916006951149, "grad_norm": 0.5447351336479187, "learning_rate": 9.994671738369183e-05, "loss": 0.7018, "step": 1173 }, { "epoch": 0.22668468816373818, "grad_norm": 0.5284375548362732, "learning_rate": 9.994593683713405e-05, "loss": 0.7359, "step": 1174 }, { "epoch": 0.22687777563236147, "grad_norm": 0.46539047360420227, "learning_rate": 9.994515061798218e-05, "loss": 0.696, "step": 1175 }, { "epoch": 0.22707086310098473, "grad_norm": 0.6390096545219421, "learning_rate": 9.994435872632553e-05, "loss": 0.7381, "step": 1176 }, { "epoch": 0.22726395056960802, "grad_norm": 0.582425057888031, "learning_rate": 9.994356116225401e-05, "loss": 0.7669, "step": 1177 }, { "epoch": 0.2274570380382313, "grad_norm": 0.5095418095588684, "learning_rate": 9.994275792585824e-05, "loss": 0.6627, "step": 1178 }, { "epoch": 0.2276501255068546, "grad_norm": 0.6614007353782654, "learning_rate": 9.994194901722941e-05, "loss": 0.7311, "step": 1179 }, { "epoch": 0.22784321297547788, "grad_norm": 0.6688441634178162, "learning_rate": 9.994113443645943e-05, "loss": 0.6979, "step": 1180 }, { "epoch": 0.22803630044410117, "grad_norm": 0.5209106802940369, "learning_rate": 9.994031418364078e-05, "loss": 0.7307, "step": 1181 }, { "epoch": 0.22822938791272446, "grad_norm": 0.8705430030822754, "learning_rate": 9.993948825886664e-05, "loss": 0.658, "step": 1182 }, { "epoch": 0.22842247538134774, "grad_norm": 0.46040862798690796, "learning_rate": 9.993865666223082e-05, "loss": 0.7168, "step": 1183 }, { "epoch": 0.22861556284997103, "grad_norm": 0.5629756450653076, "learning_rate": 9.993781939382776e-05, "loss": 0.7585, "step": 1184 }, { "epoch": 0.22880865031859432, "grad_norm": 0.6951826214790344, "learning_rate": 9.993697645375254e-05, "loss": 0.6811, "step": 1185 }, { "epoch": 0.2290017377872176, "grad_norm": 0.6121455430984497, "learning_rate": 9.993612784210092e-05, "loss": 0.6995, "step": 1186 }, { "epoch": 0.2291948252558409, "grad_norm": 0.47211116552352905, "learning_rate": 9.993527355896928e-05, "loss": 0.708, "step": 1187 }, { "epoch": 0.22938791272446418, "grad_norm": 0.6039971709251404, "learning_rate": 9.993441360445461e-05, "loss": 0.7413, "step": 1188 }, { "epoch": 0.22958100019308747, "grad_norm": 0.4253261983394623, "learning_rate": 9.993354797865462e-05, "loss": 0.7182, "step": 1189 }, { "epoch": 0.22977408766171076, "grad_norm": 0.5305140018463135, "learning_rate": 9.99326766816676e-05, "loss": 0.6727, "step": 1190 }, { "epoch": 0.22996717513033405, "grad_norm": 0.4861665368080139, "learning_rate": 9.993179971359252e-05, "loss": 0.7205, "step": 1191 }, { "epoch": 0.23016026259895733, "grad_norm": 1.4529622793197632, "learning_rate": 9.993091707452897e-05, "loss": 0.7436, "step": 1192 }, { "epoch": 0.23035335006758062, "grad_norm": 0.7247600555419922, "learning_rate": 9.99300287645772e-05, "loss": 0.6901, "step": 1193 }, { "epoch": 0.2305464375362039, "grad_norm": 0.8072078227996826, "learning_rate": 9.99291347838381e-05, "loss": 0.6431, "step": 1194 }, { "epoch": 0.2307395250048272, "grad_norm": 0.8437206149101257, "learning_rate": 9.992823513241321e-05, "loss": 0.7018, "step": 1195 }, { "epoch": 0.23093261247345048, "grad_norm": 0.8489112257957458, "learning_rate": 9.992732981040469e-05, "loss": 0.6898, "step": 1196 }, { "epoch": 0.23112569994207377, "grad_norm": 0.6669548153877258, "learning_rate": 9.992641881791539e-05, "loss": 0.6792, "step": 1197 }, { "epoch": 0.23131878741069706, "grad_norm": 0.4363873302936554, "learning_rate": 9.992550215504875e-05, "loss": 0.6723, "step": 1198 }, { "epoch": 0.23151187487932035, "grad_norm": 1.0966647863388062, "learning_rate": 9.992457982190888e-05, "loss": 0.7635, "step": 1199 }, { "epoch": 0.2317049623479436, "grad_norm": 0.3843541145324707, "learning_rate": 9.992365181860054e-05, "loss": 0.7235, "step": 1200 }, { "epoch": 0.2318980498165669, "grad_norm": 0.5866299867630005, "learning_rate": 9.992271814522915e-05, "loss": 0.704, "step": 1201 }, { "epoch": 0.23209113728519018, "grad_norm": 0.7000101804733276, "learning_rate": 9.992177880190072e-05, "loss": 0.6749, "step": 1202 }, { "epoch": 0.23228422475381347, "grad_norm": 0.5280899405479431, "learning_rate": 9.992083378872196e-05, "loss": 0.7492, "step": 1203 }, { "epoch": 0.23247731222243675, "grad_norm": 0.4495706260204315, "learning_rate": 9.991988310580019e-05, "loss": 0.7302, "step": 1204 }, { "epoch": 0.23267039969106004, "grad_norm": 0.42649585008621216, "learning_rate": 9.991892675324335e-05, "loss": 0.6927, "step": 1205 }, { "epoch": 0.23286348715968333, "grad_norm": 0.5173787474632263, "learning_rate": 9.991796473116011e-05, "loss": 0.7126, "step": 1206 }, { "epoch": 0.23305657462830662, "grad_norm": 0.6269359588623047, "learning_rate": 9.99169970396597e-05, "loss": 0.6958, "step": 1207 }, { "epoch": 0.2332496620969299, "grad_norm": 0.47083258628845215, "learning_rate": 9.991602367885205e-05, "loss": 0.6633, "step": 1208 }, { "epoch": 0.2334427495655532, "grad_norm": 0.4209361970424652, "learning_rate": 9.991504464884767e-05, "loss": 0.7415, "step": 1209 }, { "epoch": 0.23363583703417648, "grad_norm": 0.4902897775173187, "learning_rate": 9.99140599497578e-05, "loss": 0.6515, "step": 1210 }, { "epoch": 0.23382892450279977, "grad_norm": 0.5516731142997742, "learning_rate": 9.991306958169424e-05, "loss": 0.6447, "step": 1211 }, { "epoch": 0.23402201197142306, "grad_norm": 0.5259610414505005, "learning_rate": 9.991207354476948e-05, "loss": 0.6702, "step": 1212 }, { "epoch": 0.23421509944004634, "grad_norm": 0.42607396841049194, "learning_rate": 9.991107183909664e-05, "loss": 0.6471, "step": 1213 }, { "epoch": 0.23440818690866963, "grad_norm": 0.3712533414363861, "learning_rate": 9.991006446478951e-05, "loss": 0.654, "step": 1214 }, { "epoch": 0.23460127437729292, "grad_norm": 0.8497632741928101, "learning_rate": 9.990905142196251e-05, "loss": 0.6699, "step": 1215 }, { "epoch": 0.2347943618459162, "grad_norm": 0.4819409251213074, "learning_rate": 9.990803271073066e-05, "loss": 0.7031, "step": 1216 }, { "epoch": 0.2349874493145395, "grad_norm": 0.5141865611076355, "learning_rate": 9.990700833120967e-05, "loss": 0.7439, "step": 1217 }, { "epoch": 0.23518053678316278, "grad_norm": 0.43491876125335693, "learning_rate": 9.99059782835159e-05, "loss": 0.6747, "step": 1218 }, { "epoch": 0.23537362425178607, "grad_norm": 0.4617178738117218, "learning_rate": 9.990494256776631e-05, "loss": 0.7063, "step": 1219 }, { "epoch": 0.23556671172040936, "grad_norm": 0.5098789930343628, "learning_rate": 9.990390118407856e-05, "loss": 0.6706, "step": 1220 }, { "epoch": 0.23575979918903264, "grad_norm": 0.6668133735656738, "learning_rate": 9.99028541325709e-05, "loss": 0.6823, "step": 1221 }, { "epoch": 0.23595288665765593, "grad_norm": 0.5415806770324707, "learning_rate": 9.990180141336227e-05, "loss": 0.6208, "step": 1222 }, { "epoch": 0.23614597412627922, "grad_norm": 0.4333534836769104, "learning_rate": 9.990074302657222e-05, "loss": 0.6987, "step": 1223 }, { "epoch": 0.23633906159490248, "grad_norm": 0.5628812909126282, "learning_rate": 9.989967897232095e-05, "loss": 0.678, "step": 1224 }, { "epoch": 0.23653214906352577, "grad_norm": 0.6253170967102051, "learning_rate": 9.989860925072933e-05, "loss": 0.6924, "step": 1225 }, { "epoch": 0.23672523653214905, "grad_norm": 0.6114835143089294, "learning_rate": 9.989753386191885e-05, "loss": 0.7177, "step": 1226 }, { "epoch": 0.23691832400077234, "grad_norm": 0.4187268912792206, "learning_rate": 9.989645280601163e-05, "loss": 0.6117, "step": 1227 }, { "epoch": 0.23711141146939563, "grad_norm": 0.6926881670951843, "learning_rate": 9.989536608313045e-05, "loss": 0.6414, "step": 1228 }, { "epoch": 0.23730449893801892, "grad_norm": 1.933449625968933, "learning_rate": 9.989427369339874e-05, "loss": 0.7321, "step": 1229 }, { "epoch": 0.2374975864066422, "grad_norm": 0.6588943600654602, "learning_rate": 9.989317563694059e-05, "loss": 0.7403, "step": 1230 }, { "epoch": 0.2376906738752655, "grad_norm": 0.6404739618301392, "learning_rate": 9.989207191388069e-05, "loss": 0.7213, "step": 1231 }, { "epoch": 0.23788376134388878, "grad_norm": 0.4399758577346802, "learning_rate": 9.98909625243444e-05, "loss": 0.684, "step": 1232 }, { "epoch": 0.23807684881251207, "grad_norm": 0.4618995189666748, "learning_rate": 9.98898474684577e-05, "loss": 0.7731, "step": 1233 }, { "epoch": 0.23826993628113535, "grad_norm": 1.527623176574707, "learning_rate": 9.988872674634725e-05, "loss": 0.6561, "step": 1234 }, { "epoch": 0.23846302374975864, "grad_norm": 0.5149727463722229, "learning_rate": 9.988760035814036e-05, "loss": 0.7171, "step": 1235 }, { "epoch": 0.23865611121838193, "grad_norm": 0.6762256622314453, "learning_rate": 9.988646830396491e-05, "loss": 0.732, "step": 1236 }, { "epoch": 0.23884919868700522, "grad_norm": 0.5505908131599426, "learning_rate": 9.98853305839495e-05, "loss": 0.7513, "step": 1237 }, { "epoch": 0.2390422861556285, "grad_norm": 0.443204402923584, "learning_rate": 9.988418719822335e-05, "loss": 0.7115, "step": 1238 }, { "epoch": 0.2392353736242518, "grad_norm": 0.782622754573822, "learning_rate": 9.98830381469163e-05, "loss": 0.7068, "step": 1239 }, { "epoch": 0.23942846109287508, "grad_norm": 0.45916011929512024, "learning_rate": 9.988188343015886e-05, "loss": 0.6519, "step": 1240 }, { "epoch": 0.23962154856149837, "grad_norm": 0.5387373566627502, "learning_rate": 9.988072304808221e-05, "loss": 0.6881, "step": 1241 }, { "epoch": 0.23981463603012165, "grad_norm": 0.5532600283622742, "learning_rate": 9.987955700081808e-05, "loss": 0.7009, "step": 1242 }, { "epoch": 0.24000772349874494, "grad_norm": 0.5705749988555908, "learning_rate": 9.987838528849895e-05, "loss": 0.6745, "step": 1243 }, { "epoch": 0.24020081096736823, "grad_norm": 0.4860996603965759, "learning_rate": 9.987720791125788e-05, "loss": 0.6005, "step": 1244 }, { "epoch": 0.24039389843599152, "grad_norm": 0.8031142354011536, "learning_rate": 9.987602486922859e-05, "loss": 0.6634, "step": 1245 }, { "epoch": 0.2405869859046148, "grad_norm": 0.5706808567047119, "learning_rate": 9.987483616254545e-05, "loss": 0.629, "step": 1246 }, { "epoch": 0.2407800733732381, "grad_norm": 0.611445963382721, "learning_rate": 9.987364179134347e-05, "loss": 0.6346, "step": 1247 }, { "epoch": 0.24097316084186135, "grad_norm": 1.0055334568023682, "learning_rate": 9.987244175575827e-05, "loss": 0.7954, "step": 1248 }, { "epoch": 0.24116624831048464, "grad_norm": 0.5507382154464722, "learning_rate": 9.987123605592618e-05, "loss": 0.784, "step": 1249 }, { "epoch": 0.24135933577910793, "grad_norm": 0.6595774292945862, "learning_rate": 9.987002469198413e-05, "loss": 0.6505, "step": 1250 }, { "epoch": 0.2415524232477312, "grad_norm": 0.45026591420173645, "learning_rate": 9.986880766406968e-05, "loss": 0.7409, "step": 1251 }, { "epoch": 0.2417455107163545, "grad_norm": 0.5407450199127197, "learning_rate": 9.986758497232108e-05, "loss": 0.6979, "step": 1252 }, { "epoch": 0.2419385981849778, "grad_norm": 1.8455077409744263, "learning_rate": 9.986635661687718e-05, "loss": 0.7394, "step": 1253 }, { "epoch": 0.24213168565360108, "grad_norm": 0.5805695056915283, "learning_rate": 9.986512259787751e-05, "loss": 0.6611, "step": 1254 }, { "epoch": 0.24232477312222436, "grad_norm": 0.5156840682029724, "learning_rate": 9.986388291546218e-05, "loss": 0.696, "step": 1255 }, { "epoch": 0.24251786059084765, "grad_norm": 0.7377241253852844, "learning_rate": 9.986263756977203e-05, "loss": 0.6938, "step": 1256 }, { "epoch": 0.24271094805947094, "grad_norm": 0.8500128984451294, "learning_rate": 9.986138656094848e-05, "loss": 0.6978, "step": 1257 }, { "epoch": 0.24290403552809423, "grad_norm": 0.49009567499160767, "learning_rate": 9.986012988913362e-05, "loss": 0.7356, "step": 1258 }, { "epoch": 0.2430971229967175, "grad_norm": 0.85467129945755, "learning_rate": 9.985886755447018e-05, "loss": 0.6716, "step": 1259 }, { "epoch": 0.2432902104653408, "grad_norm": 0.5452736020088196, "learning_rate": 9.98575995571015e-05, "loss": 0.6701, "step": 1260 }, { "epoch": 0.2434832979339641, "grad_norm": 0.5141881704330444, "learning_rate": 9.985632589717165e-05, "loss": 0.6659, "step": 1261 }, { "epoch": 0.24367638540258738, "grad_norm": 0.6659857034683228, "learning_rate": 9.985504657482521e-05, "loss": 0.6751, "step": 1262 }, { "epoch": 0.24386947287121066, "grad_norm": 0.49388331174850464, "learning_rate": 9.985376159020755e-05, "loss": 0.6859, "step": 1263 }, { "epoch": 0.24406256033983395, "grad_norm": 0.6730266213417053, "learning_rate": 9.985247094346457e-05, "loss": 0.6853, "step": 1264 }, { "epoch": 0.24425564780845724, "grad_norm": 0.5321809649467468, "learning_rate": 9.985117463474287e-05, "loss": 0.6235, "step": 1265 }, { "epoch": 0.24444873527708053, "grad_norm": 0.5571835041046143, "learning_rate": 9.984987266418966e-05, "loss": 0.6676, "step": 1266 }, { "epoch": 0.2446418227457038, "grad_norm": 0.5247700810432434, "learning_rate": 9.984856503195284e-05, "loss": 0.7321, "step": 1267 }, { "epoch": 0.2448349102143271, "grad_norm": 0.5205221772193909, "learning_rate": 9.984725173818089e-05, "loss": 0.6774, "step": 1268 }, { "epoch": 0.2450279976829504, "grad_norm": 0.746692955493927, "learning_rate": 9.984593278302299e-05, "loss": 0.7124, "step": 1269 }, { "epoch": 0.24522108515157368, "grad_norm": 5.928170204162598, "learning_rate": 9.984460816662896e-05, "loss": 0.7174, "step": 1270 }, { "epoch": 0.24541417262019694, "grad_norm": 0.798840343952179, "learning_rate": 9.984327788914918e-05, "loss": 0.7778, "step": 1271 }, { "epoch": 0.24560726008882022, "grad_norm": 0.5952019095420837, "learning_rate": 9.98419419507348e-05, "loss": 0.705, "step": 1272 }, { "epoch": 0.2458003475574435, "grad_norm": 0.6628732681274414, "learning_rate": 9.98406003515375e-05, "loss": 0.6663, "step": 1273 }, { "epoch": 0.2459934350260668, "grad_norm": 0.5162720680236816, "learning_rate": 9.983925309170969e-05, "loss": 0.6902, "step": 1274 }, { "epoch": 0.24618652249469009, "grad_norm": 0.5153845548629761, "learning_rate": 9.983790017140438e-05, "loss": 0.7436, "step": 1275 }, { "epoch": 0.24637960996331337, "grad_norm": 0.4787285327911377, "learning_rate": 9.98365415907752e-05, "loss": 0.7248, "step": 1276 }, { "epoch": 0.24657269743193666, "grad_norm": 0.4701177775859833, "learning_rate": 9.983517734997647e-05, "loss": 0.6996, "step": 1277 }, { "epoch": 0.24676578490055995, "grad_norm": 0.5480031371116638, "learning_rate": 9.983380744916313e-05, "loss": 0.618, "step": 1278 }, { "epoch": 0.24695887236918324, "grad_norm": 0.5663642883300781, "learning_rate": 9.983243188849078e-05, "loss": 0.6903, "step": 1279 }, { "epoch": 0.24715195983780652, "grad_norm": 0.6805466413497925, "learning_rate": 9.983105066811562e-05, "loss": 0.6081, "step": 1280 }, { "epoch": 0.2473450473064298, "grad_norm": 0.9470702409744263, "learning_rate": 9.982966378819455e-05, "loss": 0.7191, "step": 1281 }, { "epoch": 0.2475381347750531, "grad_norm": 0.47824451327323914, "learning_rate": 9.982827124888505e-05, "loss": 0.6201, "step": 1282 }, { "epoch": 0.24773122224367639, "grad_norm": 0.47851428389549255, "learning_rate": 9.98268730503453e-05, "loss": 0.7743, "step": 1283 }, { "epoch": 0.24792430971229967, "grad_norm": 0.4650160074234009, "learning_rate": 9.982546919273412e-05, "loss": 0.6165, "step": 1284 }, { "epoch": 0.24811739718092296, "grad_norm": 0.46949684619903564, "learning_rate": 9.982405967621091e-05, "loss": 0.6935, "step": 1285 }, { "epoch": 0.24831048464954625, "grad_norm": 0.5536295175552368, "learning_rate": 9.982264450093578e-05, "loss": 0.6677, "step": 1286 }, { "epoch": 0.24850357211816954, "grad_norm": 0.7029494047164917, "learning_rate": 9.982122366706945e-05, "loss": 0.6496, "step": 1287 }, { "epoch": 0.24869665958679282, "grad_norm": 0.8037139773368835, "learning_rate": 9.981979717477331e-05, "loss": 0.6487, "step": 1288 }, { "epoch": 0.2488897470554161, "grad_norm": 0.7154842019081116, "learning_rate": 9.981836502420936e-05, "loss": 0.7728, "step": 1289 }, { "epoch": 0.2490828345240394, "grad_norm": 0.6512240171432495, "learning_rate": 9.981692721554024e-05, "loss": 0.7539, "step": 1290 }, { "epoch": 0.24927592199266269, "grad_norm": 0.4765041470527649, "learning_rate": 9.981548374892927e-05, "loss": 0.6835, "step": 1291 }, { "epoch": 0.24946900946128597, "grad_norm": 0.4749453365802765, "learning_rate": 9.981403462454038e-05, "loss": 0.6786, "step": 1292 }, { "epoch": 0.24966209692990926, "grad_norm": 0.4984728693962097, "learning_rate": 9.981257984253816e-05, "loss": 0.7371, "step": 1293 }, { "epoch": 0.24985518439853255, "grad_norm": 0.5509971976280212, "learning_rate": 9.981111940308784e-05, "loss": 0.6873, "step": 1294 }, { "epoch": 0.25004827186715584, "grad_norm": 0.642442524433136, "learning_rate": 9.980965330635528e-05, "loss": 0.6891, "step": 1295 }, { "epoch": 0.2502413593357791, "grad_norm": 1.9419364929199219, "learning_rate": 9.9808181552507e-05, "loss": 0.6946, "step": 1296 }, { "epoch": 0.2504344468044024, "grad_norm": 1.3618475198745728, "learning_rate": 9.980670414171014e-05, "loss": 0.7027, "step": 1297 }, { "epoch": 0.2506275342730257, "grad_norm": 0.7647218704223633, "learning_rate": 9.980522107413251e-05, "loss": 0.6752, "step": 1298 }, { "epoch": 0.250820621741649, "grad_norm": 0.8694763779640198, "learning_rate": 9.980373234994255e-05, "loss": 0.6761, "step": 1299 }, { "epoch": 0.2510137092102723, "grad_norm": 0.5918258428573608, "learning_rate": 9.980223796930931e-05, "loss": 0.6383, "step": 1300 }, { "epoch": 0.25120679667889556, "grad_norm": 0.43957072496414185, "learning_rate": 9.980073793240258e-05, "loss": 0.6548, "step": 1301 }, { "epoch": 0.25139988414751885, "grad_norm": 1.0097383260726929, "learning_rate": 9.979923223939267e-05, "loss": 0.651, "step": 1302 }, { "epoch": 0.25159297161614214, "grad_norm": 1.0098304748535156, "learning_rate": 9.97977208904506e-05, "loss": 0.6654, "step": 1303 }, { "epoch": 0.2517860590847654, "grad_norm": 0.4656386077404022, "learning_rate": 9.979620388574802e-05, "loss": 0.6384, "step": 1304 }, { "epoch": 0.2519791465533887, "grad_norm": 0.556538462638855, "learning_rate": 9.979468122545723e-05, "loss": 0.6892, "step": 1305 }, { "epoch": 0.252172234022012, "grad_norm": 0.8880836367607117, "learning_rate": 9.979315290975117e-05, "loss": 0.7695, "step": 1306 }, { "epoch": 0.25236532149063523, "grad_norm": 0.4774262011051178, "learning_rate": 9.979161893880342e-05, "loss": 0.6826, "step": 1307 }, { "epoch": 0.2525584089592585, "grad_norm": 0.6986210346221924, "learning_rate": 9.979007931278818e-05, "loss": 0.6395, "step": 1308 }, { "epoch": 0.2527514964278818, "grad_norm": 0.5247126221656799, "learning_rate": 9.978853403188035e-05, "loss": 0.7069, "step": 1309 }, { "epoch": 0.2529445838965051, "grad_norm": 0.5691702365875244, "learning_rate": 9.978698309625539e-05, "loss": 0.733, "step": 1310 }, { "epoch": 0.2531376713651284, "grad_norm": 0.5100505948066711, "learning_rate": 9.978542650608946e-05, "loss": 0.7096, "step": 1311 }, { "epoch": 0.25333075883375167, "grad_norm": 0.7362365126609802, "learning_rate": 9.978386426155936e-05, "loss": 0.7028, "step": 1312 }, { "epoch": 0.25352384630237496, "grad_norm": 0.5717673897743225, "learning_rate": 9.978229636284253e-05, "loss": 0.6837, "step": 1313 }, { "epoch": 0.25371693377099824, "grad_norm": 0.6243453621864319, "learning_rate": 9.978072281011703e-05, "loss": 0.738, "step": 1314 }, { "epoch": 0.25391002123962153, "grad_norm": 0.5313227772712708, "learning_rate": 9.977914360356156e-05, "loss": 0.7645, "step": 1315 }, { "epoch": 0.2541031087082448, "grad_norm": 0.7598880529403687, "learning_rate": 9.977755874335551e-05, "loss": 0.7227, "step": 1316 }, { "epoch": 0.2542961961768681, "grad_norm": 0.6177743673324585, "learning_rate": 9.977596822967886e-05, "loss": 0.6968, "step": 1317 }, { "epoch": 0.2544892836454914, "grad_norm": 0.92793208360672, "learning_rate": 9.977437206271226e-05, "loss": 0.7043, "step": 1318 }, { "epoch": 0.2546823711141147, "grad_norm": 0.5982065796852112, "learning_rate": 9.9772770242637e-05, "loss": 0.7002, "step": 1319 }, { "epoch": 0.25487545858273797, "grad_norm": 0.770797610282898, "learning_rate": 9.977116276963499e-05, "loss": 0.6701, "step": 1320 }, { "epoch": 0.25506854605136126, "grad_norm": 0.7954216599464417, "learning_rate": 9.976954964388879e-05, "loss": 0.6999, "step": 1321 }, { "epoch": 0.25526163351998454, "grad_norm": 0.7413232922554016, "learning_rate": 9.976793086558165e-05, "loss": 0.6921, "step": 1322 }, { "epoch": 0.25545472098860783, "grad_norm": 0.6922728419303894, "learning_rate": 9.976630643489739e-05, "loss": 0.7135, "step": 1323 }, { "epoch": 0.2556478084572311, "grad_norm": 0.731373131275177, "learning_rate": 9.976467635202051e-05, "loss": 0.6782, "step": 1324 }, { "epoch": 0.2558408959258544, "grad_norm": 0.5668162107467651, "learning_rate": 9.976304061713615e-05, "loss": 0.7622, "step": 1325 }, { "epoch": 0.2560339833944777, "grad_norm": 0.6601071953773499, "learning_rate": 9.976139923043008e-05, "loss": 0.6732, "step": 1326 }, { "epoch": 0.256227070863101, "grad_norm": 1.1180938482284546, "learning_rate": 9.975975219208873e-05, "loss": 0.657, "step": 1327 }, { "epoch": 0.25642015833172427, "grad_norm": 0.5974480509757996, "learning_rate": 9.975809950229916e-05, "loss": 0.692, "step": 1328 }, { "epoch": 0.25661324580034756, "grad_norm": 0.7789129614830017, "learning_rate": 9.975644116124908e-05, "loss": 0.6812, "step": 1329 }, { "epoch": 0.25680633326897084, "grad_norm": 0.5676125288009644, "learning_rate": 9.975477716912682e-05, "loss": 0.6464, "step": 1330 }, { "epoch": 0.25699942073759413, "grad_norm": 0.8628600835800171, "learning_rate": 9.975310752612137e-05, "loss": 0.6809, "step": 1331 }, { "epoch": 0.2571925082062174, "grad_norm": 0.7577544450759888, "learning_rate": 9.975143223242238e-05, "loss": 0.6826, "step": 1332 }, { "epoch": 0.2573855956748407, "grad_norm": 0.5411525964736938, "learning_rate": 9.97497512882201e-05, "loss": 0.66, "step": 1333 }, { "epoch": 0.257578683143464, "grad_norm": 0.5680496692657471, "learning_rate": 9.974806469370544e-05, "loss": 0.6922, "step": 1334 }, { "epoch": 0.2577717706120873, "grad_norm": 0.6375073194503784, "learning_rate": 9.974637244906997e-05, "loss": 0.6858, "step": 1335 }, { "epoch": 0.25796485808071057, "grad_norm": 0.5235183238983154, "learning_rate": 9.974467455450587e-05, "loss": 0.6162, "step": 1336 }, { "epoch": 0.25815794554933386, "grad_norm": 0.9989587068557739, "learning_rate": 9.974297101020599e-05, "loss": 0.6896, "step": 1337 }, { "epoch": 0.25835103301795714, "grad_norm": 2.7161777019500732, "learning_rate": 9.97412618163638e-05, "loss": 0.6798, "step": 1338 }, { "epoch": 0.25854412048658043, "grad_norm": 0.8185703754425049, "learning_rate": 9.973954697317345e-05, "loss": 0.7363, "step": 1339 }, { "epoch": 0.2587372079552037, "grad_norm": 0.648260235786438, "learning_rate": 9.973782648082967e-05, "loss": 0.6934, "step": 1340 }, { "epoch": 0.258930295423827, "grad_norm": 0.5706990361213684, "learning_rate": 9.973610033952787e-05, "loss": 0.6239, "step": 1341 }, { "epoch": 0.2591233828924503, "grad_norm": 1.2286550998687744, "learning_rate": 9.973436854946409e-05, "loss": 0.6638, "step": 1342 }, { "epoch": 0.2593164703610736, "grad_norm": 0.6028960943222046, "learning_rate": 9.973263111083503e-05, "loss": 0.6353, "step": 1343 }, { "epoch": 0.25950955782969687, "grad_norm": 1.3245458602905273, "learning_rate": 9.973088802383804e-05, "loss": 0.6326, "step": 1344 }, { "epoch": 0.25970264529832016, "grad_norm": 0.656560480594635, "learning_rate": 9.972913928867104e-05, "loss": 0.6648, "step": 1345 }, { "epoch": 0.25989573276694344, "grad_norm": 0.5707398056983948, "learning_rate": 9.97273849055327e-05, "loss": 0.7281, "step": 1346 }, { "epoch": 0.26008882023556673, "grad_norm": 1.0188860893249512, "learning_rate": 9.972562487462223e-05, "loss": 0.7134, "step": 1347 }, { "epoch": 0.26028190770419, "grad_norm": 0.7281789183616638, "learning_rate": 9.972385919613952e-05, "loss": 0.7018, "step": 1348 }, { "epoch": 0.2604749951728133, "grad_norm": 0.5606681108474731, "learning_rate": 9.972208787028514e-05, "loss": 0.732, "step": 1349 }, { "epoch": 0.2606680826414366, "grad_norm": 0.856863260269165, "learning_rate": 9.972031089726025e-05, "loss": 0.6899, "step": 1350 }, { "epoch": 0.2608611701100599, "grad_norm": 0.6215125322341919, "learning_rate": 9.971852827726667e-05, "loss": 0.6812, "step": 1351 }, { "epoch": 0.26105425757868317, "grad_norm": 0.5692094564437866, "learning_rate": 9.971674001050686e-05, "loss": 0.7095, "step": 1352 }, { "epoch": 0.26124734504730646, "grad_norm": 0.6235180497169495, "learning_rate": 9.971494609718393e-05, "loss": 0.7166, "step": 1353 }, { "epoch": 0.26144043251592974, "grad_norm": 0.7730413675308228, "learning_rate": 9.971314653750161e-05, "loss": 0.6899, "step": 1354 }, { "epoch": 0.261633519984553, "grad_norm": 0.5654771327972412, "learning_rate": 9.971134133166429e-05, "loss": 0.7057, "step": 1355 }, { "epoch": 0.26182660745317626, "grad_norm": 0.6890090107917786, "learning_rate": 9.970953047987702e-05, "loss": 0.7581, "step": 1356 }, { "epoch": 0.26201969492179955, "grad_norm": 0.6057662963867188, "learning_rate": 9.970771398234541e-05, "loss": 0.6199, "step": 1357 }, { "epoch": 0.26221278239042284, "grad_norm": 0.691804051399231, "learning_rate": 9.97058918392758e-05, "loss": 0.715, "step": 1358 }, { "epoch": 0.2624058698590461, "grad_norm": 0.7367483377456665, "learning_rate": 9.970406405087518e-05, "loss": 0.6762, "step": 1359 }, { "epoch": 0.2625989573276694, "grad_norm": 0.8982032537460327, "learning_rate": 9.970223061735106e-05, "loss": 0.7016, "step": 1360 }, { "epoch": 0.2627920447962927, "grad_norm": 0.6093631386756897, "learning_rate": 9.970039153891174e-05, "loss": 0.6828, "step": 1361 }, { "epoch": 0.262985132264916, "grad_norm": 0.7547188997268677, "learning_rate": 9.969854681576607e-05, "loss": 0.7258, "step": 1362 }, { "epoch": 0.2631782197335393, "grad_norm": 0.6059181690216064, "learning_rate": 9.969669644812354e-05, "loss": 0.6454, "step": 1363 }, { "epoch": 0.26337130720216256, "grad_norm": 0.5895715951919556, "learning_rate": 9.969484043619434e-05, "loss": 0.7217, "step": 1364 }, { "epoch": 0.26356439467078585, "grad_norm": 0.5462126731872559, "learning_rate": 9.969297878018924e-05, "loss": 0.6971, "step": 1365 }, { "epoch": 0.26375748213940914, "grad_norm": 1.2025182247161865, "learning_rate": 9.969111148031969e-05, "loss": 0.689, "step": 1366 }, { "epoch": 0.2639505696080324, "grad_norm": 0.6713581681251526, "learning_rate": 9.968923853679776e-05, "loss": 0.6754, "step": 1367 }, { "epoch": 0.2641436570766557, "grad_norm": 0.523093581199646, "learning_rate": 9.96873599498362e-05, "loss": 0.7251, "step": 1368 }, { "epoch": 0.264336744545279, "grad_norm": 0.7093631625175476, "learning_rate": 9.968547571964833e-05, "loss": 0.6877, "step": 1369 }, { "epoch": 0.2645298320139023, "grad_norm": 0.8192763328552246, "learning_rate": 9.968358584644815e-05, "loss": 0.647, "step": 1370 }, { "epoch": 0.2647229194825256, "grad_norm": 0.5965219736099243, "learning_rate": 9.968169033045034e-05, "loss": 0.6595, "step": 1371 }, { "epoch": 0.26491600695114886, "grad_norm": 0.8803829550743103, "learning_rate": 9.967978917187014e-05, "loss": 0.6896, "step": 1372 }, { "epoch": 0.26510909441977215, "grad_norm": 0.8142882585525513, "learning_rate": 9.967788237092351e-05, "loss": 0.6411, "step": 1373 }, { "epoch": 0.26530218188839544, "grad_norm": 0.9102601408958435, "learning_rate": 9.967596992782699e-05, "loss": 0.6378, "step": 1374 }, { "epoch": 0.2654952693570187, "grad_norm": 0.9574174284934998, "learning_rate": 9.967405184279779e-05, "loss": 0.6507, "step": 1375 }, { "epoch": 0.265688356825642, "grad_norm": 0.5317414999008179, "learning_rate": 9.967212811605378e-05, "loss": 0.8118, "step": 1376 }, { "epoch": 0.2658814442942653, "grad_norm": 4.5353827476501465, "learning_rate": 9.96701987478134e-05, "loss": 0.6532, "step": 1377 }, { "epoch": 0.2660745317628886, "grad_norm": 0.506221354007721, "learning_rate": 9.96682637382958e-05, "loss": 0.6856, "step": 1378 }, { "epoch": 0.2662676192315119, "grad_norm": 0.5879683494567871, "learning_rate": 9.966632308772077e-05, "loss": 0.6806, "step": 1379 }, { "epoch": 0.26646070670013516, "grad_norm": 0.8153401613235474, "learning_rate": 9.966437679630869e-05, "loss": 0.6402, "step": 1380 }, { "epoch": 0.26665379416875845, "grad_norm": 0.8994637727737427, "learning_rate": 9.966242486428063e-05, "loss": 0.6982, "step": 1381 }, { "epoch": 0.26684688163738174, "grad_norm": 0.5783627033233643, "learning_rate": 9.966046729185825e-05, "loss": 0.7156, "step": 1382 }, { "epoch": 0.267039969106005, "grad_norm": 0.895794689655304, "learning_rate": 9.965850407926391e-05, "loss": 0.6141, "step": 1383 }, { "epoch": 0.2672330565746283, "grad_norm": 0.7883197665214539, "learning_rate": 9.965653522672058e-05, "loss": 0.7547, "step": 1384 }, { "epoch": 0.2674261440432516, "grad_norm": 0.739843487739563, "learning_rate": 9.965456073445185e-05, "loss": 0.7792, "step": 1385 }, { "epoch": 0.2676192315118749, "grad_norm": 0.7859050035476685, "learning_rate": 9.9652580602682e-05, "loss": 0.7707, "step": 1386 }, { "epoch": 0.2678123189804982, "grad_norm": 0.4574093222618103, "learning_rate": 9.96505948316359e-05, "loss": 0.6631, "step": 1387 }, { "epoch": 0.26800540644912146, "grad_norm": 0.5529264211654663, "learning_rate": 9.964860342153907e-05, "loss": 0.6433, "step": 1388 }, { "epoch": 0.26819849391774475, "grad_norm": 0.47034960985183716, "learning_rate": 9.964660637261774e-05, "loss": 0.658, "step": 1389 }, { "epoch": 0.26839158138636804, "grad_norm": 0.7842029929161072, "learning_rate": 9.964460368509867e-05, "loss": 0.6365, "step": 1390 }, { "epoch": 0.2685846688549913, "grad_norm": 0.5076283812522888, "learning_rate": 9.964259535920934e-05, "loss": 0.7434, "step": 1391 }, { "epoch": 0.2687777563236146, "grad_norm": 0.5134409666061401, "learning_rate": 9.964058139517785e-05, "loss": 0.7105, "step": 1392 }, { "epoch": 0.2689708437922379, "grad_norm": 0.6161268949508667, "learning_rate": 9.96385617932329e-05, "loss": 0.6929, "step": 1393 }, { "epoch": 0.2691639312608612, "grad_norm": 0.4836139678955078, "learning_rate": 9.963653655360391e-05, "loss": 0.7477, "step": 1394 }, { "epoch": 0.2693570187294845, "grad_norm": 0.6604567766189575, "learning_rate": 9.963450567652087e-05, "loss": 0.7231, "step": 1395 }, { "epoch": 0.26955010619810776, "grad_norm": 1.3989616632461548, "learning_rate": 9.963246916221443e-05, "loss": 0.6698, "step": 1396 }, { "epoch": 0.26974319366673105, "grad_norm": 0.8802179098129272, "learning_rate": 9.963042701091592e-05, "loss": 0.6682, "step": 1397 }, { "epoch": 0.26993628113535434, "grad_norm": 0.4423862099647522, "learning_rate": 9.962837922285723e-05, "loss": 0.6901, "step": 1398 }, { "epoch": 0.2701293686039776, "grad_norm": 0.5593369603157043, "learning_rate": 9.962632579827098e-05, "loss": 0.6259, "step": 1399 }, { "epoch": 0.2703224560726009, "grad_norm": 0.581584632396698, "learning_rate": 9.962426673739039e-05, "loss": 0.7532, "step": 1400 }, { "epoch": 0.2705155435412242, "grad_norm": 0.533790647983551, "learning_rate": 9.962220204044925e-05, "loss": 0.6628, "step": 1401 }, { "epoch": 0.27070863100984743, "grad_norm": 0.544889509677887, "learning_rate": 9.962013170768214e-05, "loss": 0.6572, "step": 1402 }, { "epoch": 0.2709017184784707, "grad_norm": 0.5405671000480652, "learning_rate": 9.961805573932415e-05, "loss": 0.7332, "step": 1403 }, { "epoch": 0.271094805947094, "grad_norm": 1.0494540929794312, "learning_rate": 9.961597413561106e-05, "loss": 0.7, "step": 1404 }, { "epoch": 0.2712878934157173, "grad_norm": 0.7577146887779236, "learning_rate": 9.961388689677931e-05, "loss": 0.6674, "step": 1405 }, { "epoch": 0.2714809808843406, "grad_norm": 1.6265240907669067, "learning_rate": 9.961179402306593e-05, "loss": 0.6638, "step": 1406 }, { "epoch": 0.27167406835296387, "grad_norm": 0.785574197769165, "learning_rate": 9.960969551470864e-05, "loss": 0.6661, "step": 1407 }, { "epoch": 0.27186715582158716, "grad_norm": 0.5466613173484802, "learning_rate": 9.960759137194577e-05, "loss": 0.6402, "step": 1408 }, { "epoch": 0.27206024329021045, "grad_norm": 0.5638963580131531, "learning_rate": 9.960548159501629e-05, "loss": 0.6578, "step": 1409 }, { "epoch": 0.27225333075883373, "grad_norm": 0.5901616811752319, "learning_rate": 9.960336618415982e-05, "loss": 0.6797, "step": 1410 }, { "epoch": 0.272446418227457, "grad_norm": 0.6696586608886719, "learning_rate": 9.960124513961662e-05, "loss": 0.6853, "step": 1411 }, { "epoch": 0.2726395056960803, "grad_norm": 0.6556548476219177, "learning_rate": 9.95991184616276e-05, "loss": 0.6919, "step": 1412 }, { "epoch": 0.2728325931647036, "grad_norm": 0.6306750774383545, "learning_rate": 9.959698615043427e-05, "loss": 0.7081, "step": 1413 }, { "epoch": 0.2730256806333269, "grad_norm": 1.0618267059326172, "learning_rate": 9.959484820627884e-05, "loss": 0.7123, "step": 1414 }, { "epoch": 0.27321876810195017, "grad_norm": 0.6119487881660461, "learning_rate": 9.959270462940409e-05, "loss": 0.72, "step": 1415 }, { "epoch": 0.27341185557057346, "grad_norm": 0.7641358971595764, "learning_rate": 9.959055542005348e-05, "loss": 0.6251, "step": 1416 }, { "epoch": 0.27360494303919675, "grad_norm": 0.5446006655693054, "learning_rate": 9.958840057847115e-05, "loss": 0.6929, "step": 1417 }, { "epoch": 0.27379803050782003, "grad_norm": 1.1373965740203857, "learning_rate": 9.958624010490179e-05, "loss": 0.6823, "step": 1418 }, { "epoch": 0.2739911179764433, "grad_norm": 0.6680736541748047, "learning_rate": 9.95840739995908e-05, "loss": 0.6787, "step": 1419 }, { "epoch": 0.2741842054450666, "grad_norm": 0.6378948092460632, "learning_rate": 9.958190226278416e-05, "loss": 0.7207, "step": 1420 }, { "epoch": 0.2743772929136899, "grad_norm": 0.957408607006073, "learning_rate": 9.957972489472857e-05, "loss": 0.6889, "step": 1421 }, { "epoch": 0.2745703803823132, "grad_norm": 0.9005356431007385, "learning_rate": 9.95775418956713e-05, "loss": 0.6956, "step": 1422 }, { "epoch": 0.27476346785093647, "grad_norm": 0.6700565218925476, "learning_rate": 9.957535326586027e-05, "loss": 0.669, "step": 1423 }, { "epoch": 0.27495655531955976, "grad_norm": 0.6054048538208008, "learning_rate": 9.957315900554409e-05, "loss": 0.6542, "step": 1424 }, { "epoch": 0.27514964278818305, "grad_norm": 0.8298527002334595, "learning_rate": 9.957095911497194e-05, "loss": 0.6795, "step": 1425 }, { "epoch": 0.27534273025680633, "grad_norm": 0.8155515193939209, "learning_rate": 9.956875359439368e-05, "loss": 0.7953, "step": 1426 }, { "epoch": 0.2755358177254296, "grad_norm": 0.7301352024078369, "learning_rate": 9.95665424440598e-05, "loss": 0.6495, "step": 1427 }, { "epoch": 0.2757289051940529, "grad_norm": 0.7811538577079773, "learning_rate": 9.956432566422144e-05, "loss": 0.6984, "step": 1428 }, { "epoch": 0.2759219926626762, "grad_norm": 0.6200586557388306, "learning_rate": 9.956210325513038e-05, "loss": 0.6667, "step": 1429 }, { "epoch": 0.2761150801312995, "grad_norm": 0.6823806166648865, "learning_rate": 9.9559875217039e-05, "loss": 0.6561, "step": 1430 }, { "epoch": 0.27630816759992277, "grad_norm": 0.8788081407546997, "learning_rate": 9.955764155020037e-05, "loss": 0.7578, "step": 1431 }, { "epoch": 0.27650125506854606, "grad_norm": 0.7533479332923889, "learning_rate": 9.955540225486816e-05, "loss": 0.6744, "step": 1432 }, { "epoch": 0.27669434253716935, "grad_norm": 0.6601427793502808, "learning_rate": 9.955315733129671e-05, "loss": 0.7591, "step": 1433 }, { "epoch": 0.27688743000579263, "grad_norm": 0.6624122858047485, "learning_rate": 9.9550906779741e-05, "loss": 0.7265, "step": 1434 }, { "epoch": 0.2770805174744159, "grad_norm": 0.614734411239624, "learning_rate": 9.954865060045661e-05, "loss": 0.5963, "step": 1435 }, { "epoch": 0.2772736049430392, "grad_norm": 1.2028932571411133, "learning_rate": 9.95463887936998e-05, "loss": 0.6864, "step": 1436 }, { "epoch": 0.2774666924116625, "grad_norm": 0.8396337032318115, "learning_rate": 9.954412135972744e-05, "loss": 0.7374, "step": 1437 }, { "epoch": 0.2776597798802858, "grad_norm": 0.7080665826797485, "learning_rate": 9.954184829879707e-05, "loss": 0.6848, "step": 1438 }, { "epoch": 0.27785286734890907, "grad_norm": 0.6184977293014526, "learning_rate": 9.953956961116684e-05, "loss": 0.6467, "step": 1439 }, { "epoch": 0.27804595481753236, "grad_norm": 0.9633570909500122, "learning_rate": 9.953728529709557e-05, "loss": 0.65, "step": 1440 }, { "epoch": 0.27823904228615565, "grad_norm": 0.7810633778572083, "learning_rate": 9.953499535684267e-05, "loss": 0.6548, "step": 1441 }, { "epoch": 0.27843212975477893, "grad_norm": 0.9455865025520325, "learning_rate": 9.953269979066824e-05, "loss": 0.6563, "step": 1442 }, { "epoch": 0.2786252172234022, "grad_norm": 0.6648633480072021, "learning_rate": 9.9530398598833e-05, "loss": 0.6183, "step": 1443 }, { "epoch": 0.2788183046920255, "grad_norm": 1.732767939567566, "learning_rate": 9.952809178159829e-05, "loss": 0.6887, "step": 1444 }, { "epoch": 0.2790113921606488, "grad_norm": 17.273216247558594, "learning_rate": 9.952577933922611e-05, "loss": 0.7631, "step": 1445 }, { "epoch": 0.2792044796292721, "grad_norm": 0.6328175067901611, "learning_rate": 9.952346127197913e-05, "loss": 0.6348, "step": 1446 }, { "epoch": 0.27939756709789537, "grad_norm": 0.7442697882652283, "learning_rate": 9.952113758012057e-05, "loss": 0.6596, "step": 1447 }, { "epoch": 0.27959065456651866, "grad_norm": 1.2520458698272705, "learning_rate": 9.951880826391438e-05, "loss": 0.679, "step": 1448 }, { "epoch": 0.27978374203514195, "grad_norm": 0.7634305953979492, "learning_rate": 9.95164733236251e-05, "loss": 0.7227, "step": 1449 }, { "epoch": 0.2799768295037652, "grad_norm": 0.5990433096885681, "learning_rate": 9.951413275951791e-05, "loss": 0.6227, "step": 1450 }, { "epoch": 0.28016991697238847, "grad_norm": 0.5044633746147156, "learning_rate": 9.951178657185866e-05, "loss": 0.6819, "step": 1451 }, { "epoch": 0.28036300444101175, "grad_norm": 3.5847110748291016, "learning_rate": 9.95094347609138e-05, "loss": 0.6772, "step": 1452 }, { "epoch": 0.28055609190963504, "grad_norm": 1.1595584154129028, "learning_rate": 9.950707732695045e-05, "loss": 0.7059, "step": 1453 }, { "epoch": 0.28074917937825833, "grad_norm": 0.7924554944038391, "learning_rate": 9.950471427023633e-05, "loss": 0.6755, "step": 1454 }, { "epoch": 0.2809422668468816, "grad_norm": 0.5602629780769348, "learning_rate": 9.950234559103986e-05, "loss": 0.7219, "step": 1455 }, { "epoch": 0.2811353543155049, "grad_norm": 2.1492114067077637, "learning_rate": 9.949997128963003e-05, "loss": 0.8591, "step": 1456 }, { "epoch": 0.2813284417841282, "grad_norm": 0.7742331624031067, "learning_rate": 9.949759136627652e-05, "loss": 0.7303, "step": 1457 }, { "epoch": 0.2815215292527515, "grad_norm": 0.7405421733856201, "learning_rate": 9.949520582124963e-05, "loss": 0.7431, "step": 1458 }, { "epoch": 0.28171461672137477, "grad_norm": 0.7336825728416443, "learning_rate": 9.949281465482026e-05, "loss": 0.7069, "step": 1459 }, { "epoch": 0.28190770418999805, "grad_norm": 0.574394166469574, "learning_rate": 9.949041786726004e-05, "loss": 0.7326, "step": 1460 }, { "epoch": 0.28210079165862134, "grad_norm": 0.6746364831924438, "learning_rate": 9.948801545884115e-05, "loss": 0.7332, "step": 1461 }, { "epoch": 0.28229387912724463, "grad_norm": 0.6952245235443115, "learning_rate": 9.948560742983645e-05, "loss": 0.6483, "step": 1462 }, { "epoch": 0.2824869665958679, "grad_norm": 0.9660162925720215, "learning_rate": 9.948319378051945e-05, "loss": 0.6634, "step": 1463 }, { "epoch": 0.2826800540644912, "grad_norm": 0.5476135015487671, "learning_rate": 9.948077451116426e-05, "loss": 0.6754, "step": 1464 }, { "epoch": 0.2828731415331145, "grad_norm": 0.7871350646018982, "learning_rate": 9.947834962204564e-05, "loss": 0.6322, "step": 1465 }, { "epoch": 0.2830662290017378, "grad_norm": 1.631295084953308, "learning_rate": 9.947591911343901e-05, "loss": 0.6684, "step": 1466 }, { "epoch": 0.28325931647036107, "grad_norm": 0.5892689228057861, "learning_rate": 9.947348298562041e-05, "loss": 0.7099, "step": 1467 }, { "epoch": 0.28345240393898435, "grad_norm": 0.6763303875923157, "learning_rate": 9.947104123886653e-05, "loss": 0.7067, "step": 1468 }, { "epoch": 0.28364549140760764, "grad_norm": 0.6349692344665527, "learning_rate": 9.946859387345469e-05, "loss": 0.6971, "step": 1469 }, { "epoch": 0.28383857887623093, "grad_norm": 1.7263669967651367, "learning_rate": 9.946614088966282e-05, "loss": 0.6977, "step": 1470 }, { "epoch": 0.2840316663448542, "grad_norm": 0.5548428893089294, "learning_rate": 9.946368228776957e-05, "loss": 0.7179, "step": 1471 }, { "epoch": 0.2842247538134775, "grad_norm": 0.5770214200019836, "learning_rate": 9.946121806805412e-05, "loss": 0.7101, "step": 1472 }, { "epoch": 0.2844178412821008, "grad_norm": 0.6405675411224365, "learning_rate": 9.945874823079637e-05, "loss": 0.6456, "step": 1473 }, { "epoch": 0.2846109287507241, "grad_norm": 0.6856966018676758, "learning_rate": 9.945627277627684e-05, "loss": 0.7443, "step": 1474 }, { "epoch": 0.28480401621934737, "grad_norm": 0.6874160170555115, "learning_rate": 9.945379170477667e-05, "loss": 0.6218, "step": 1475 }, { "epoch": 0.28499710368797065, "grad_norm": 0.6110594272613525, "learning_rate": 9.945130501657764e-05, "loss": 0.6378, "step": 1476 }, { "epoch": 0.28519019115659394, "grad_norm": 1.4408831596374512, "learning_rate": 9.944881271196218e-05, "loss": 0.6499, "step": 1477 }, { "epoch": 0.28538327862521723, "grad_norm": 0.6668723821640015, "learning_rate": 9.944631479121333e-05, "loss": 0.6387, "step": 1478 }, { "epoch": 0.2855763660938405, "grad_norm": 0.5566471815109253, "learning_rate": 9.944381125461484e-05, "loss": 0.7049, "step": 1479 }, { "epoch": 0.2857694535624638, "grad_norm": 0.8047975897789001, "learning_rate": 9.944130210245102e-05, "loss": 0.7214, "step": 1480 }, { "epoch": 0.2859625410310871, "grad_norm": 0.8128103613853455, "learning_rate": 9.943878733500685e-05, "loss": 0.7125, "step": 1481 }, { "epoch": 0.2861556284997104, "grad_norm": 0.830329954624176, "learning_rate": 9.943626695256792e-05, "loss": 0.6893, "step": 1482 }, { "epoch": 0.28634871596833367, "grad_norm": 0.6301575303077698, "learning_rate": 9.943374095542052e-05, "loss": 0.6232, "step": 1483 }, { "epoch": 0.28654180343695695, "grad_norm": 0.727280855178833, "learning_rate": 9.943120934385153e-05, "loss": 0.7515, "step": 1484 }, { "epoch": 0.28673489090558024, "grad_norm": 0.6608421802520752, "learning_rate": 9.942867211814846e-05, "loss": 0.7496, "step": 1485 }, { "epoch": 0.28692797837420353, "grad_norm": 1.5165108442306519, "learning_rate": 9.942612927859947e-05, "loss": 0.6905, "step": 1486 }, { "epoch": 0.2871210658428268, "grad_norm": 1.020954966545105, "learning_rate": 9.942358082549339e-05, "loss": 0.7542, "step": 1487 }, { "epoch": 0.2873141533114501, "grad_norm": 0.7452664375305176, "learning_rate": 9.942102675911964e-05, "loss": 0.6283, "step": 1488 }, { "epoch": 0.2875072407800734, "grad_norm": 1.0430324077606201, "learning_rate": 9.941846707976832e-05, "loss": 0.6996, "step": 1489 }, { "epoch": 0.2877003282486967, "grad_norm": 1.0946365594863892, "learning_rate": 9.94159017877301e-05, "loss": 0.7042, "step": 1490 }, { "epoch": 0.28789341571731997, "grad_norm": 3.0821518898010254, "learning_rate": 9.941333088329638e-05, "loss": 0.7274, "step": 1491 }, { "epoch": 0.28808650318594325, "grad_norm": 1.619691252708435, "learning_rate": 9.941075436675913e-05, "loss": 0.8078, "step": 1492 }, { "epoch": 0.28827959065456654, "grad_norm": 1.1387126445770264, "learning_rate": 9.940817223841097e-05, "loss": 0.662, "step": 1493 }, { "epoch": 0.28847267812318983, "grad_norm": 1.0299104452133179, "learning_rate": 9.940558449854517e-05, "loss": 0.6958, "step": 1494 }, { "epoch": 0.2886657655918131, "grad_norm": 0.8579819798469543, "learning_rate": 9.940299114745563e-05, "loss": 0.7505, "step": 1495 }, { "epoch": 0.2888588530604364, "grad_norm": 1.3560514450073242, "learning_rate": 9.940039218543691e-05, "loss": 0.7365, "step": 1496 }, { "epoch": 0.28905194052905964, "grad_norm": 1.6592836380004883, "learning_rate": 9.939778761278415e-05, "loss": 0.7284, "step": 1497 }, { "epoch": 0.2892450279976829, "grad_norm": 0.7324607372283936, "learning_rate": 9.939517742979321e-05, "loss": 0.6735, "step": 1498 }, { "epoch": 0.2894381154663062, "grad_norm": 0.6870594024658203, "learning_rate": 9.939256163676048e-05, "loss": 0.681, "step": 1499 }, { "epoch": 0.2896312029349295, "grad_norm": 0.9541609287261963, "learning_rate": 9.93899402339831e-05, "loss": 0.6756, "step": 1500 }, { "epoch": 0.2896312029349295, "eval_loss": 0.7307751774787903, "eval_runtime": 49.5295, "eval_samples_per_second": 13.406, "eval_steps_per_second": 0.424, "step": 1500 }, { "epoch": 0.2898242904035528, "grad_norm": 1.5053839683532715, "learning_rate": 9.938731322175875e-05, "loss": 0.7535, "step": 1501 }, { "epoch": 0.2900173778721761, "grad_norm": 0.926396369934082, "learning_rate": 9.938468060038585e-05, "loss": 0.6538, "step": 1502 }, { "epoch": 0.29021046534079936, "grad_norm": 0.6877642273902893, "learning_rate": 9.938204237016336e-05, "loss": 0.6753, "step": 1503 }, { "epoch": 0.29040355280942265, "grad_norm": 0.6196172833442688, "learning_rate": 9.937939853139091e-05, "loss": 0.6584, "step": 1504 }, { "epoch": 0.29059664027804594, "grad_norm": 0.9475682973861694, "learning_rate": 9.937674908436879e-05, "loss": 0.6961, "step": 1505 }, { "epoch": 0.2907897277466692, "grad_norm": 0.8646773099899292, "learning_rate": 9.93740940293979e-05, "loss": 0.6505, "step": 1506 }, { "epoch": 0.2909828152152925, "grad_norm": 0.6893252730369568, "learning_rate": 9.937143336677981e-05, "loss": 0.7355, "step": 1507 }, { "epoch": 0.2911759026839158, "grad_norm": 0.9316985607147217, "learning_rate": 9.936876709681668e-05, "loss": 0.6521, "step": 1508 }, { "epoch": 0.2913689901525391, "grad_norm": 0.8283486366271973, "learning_rate": 9.936609521981132e-05, "loss": 0.6939, "step": 1509 }, { "epoch": 0.2915620776211624, "grad_norm": 1.3804329633712769, "learning_rate": 9.936341773606723e-05, "loss": 0.6628, "step": 1510 }, { "epoch": 0.29175516508978566, "grad_norm": 0.8696216940879822, "learning_rate": 9.936073464588847e-05, "loss": 0.7068, "step": 1511 }, { "epoch": 0.29194825255840895, "grad_norm": 1.080304741859436, "learning_rate": 9.935804594957979e-05, "loss": 0.7368, "step": 1512 }, { "epoch": 0.29214134002703224, "grad_norm": 0.7562654614448547, "learning_rate": 9.935535164744654e-05, "loss": 0.6645, "step": 1513 }, { "epoch": 0.2923344274956555, "grad_norm": 1.66306734085083, "learning_rate": 9.935265173979473e-05, "loss": 0.6894, "step": 1514 }, { "epoch": 0.2925275149642788, "grad_norm": 1.0781782865524292, "learning_rate": 9.934994622693101e-05, "loss": 0.6524, "step": 1515 }, { "epoch": 0.2927206024329021, "grad_norm": 0.6128290295600891, "learning_rate": 9.934723510916266e-05, "loss": 0.7337, "step": 1516 }, { "epoch": 0.2929136899015254, "grad_norm": 1.1916903257369995, "learning_rate": 9.934451838679757e-05, "loss": 0.6617, "step": 1517 }, { "epoch": 0.2931067773701487, "grad_norm": 1.0190160274505615, "learning_rate": 9.934179606014433e-05, "loss": 0.6491, "step": 1518 }, { "epoch": 0.29329986483877196, "grad_norm": 0.7512455582618713, "learning_rate": 9.933906812951207e-05, "loss": 0.6518, "step": 1519 }, { "epoch": 0.29349295230739525, "grad_norm": 0.6122041344642639, "learning_rate": 9.933633459521068e-05, "loss": 0.6595, "step": 1520 }, { "epoch": 0.29368603977601854, "grad_norm": 0.735986590385437, "learning_rate": 9.933359545755058e-05, "loss": 0.6567, "step": 1521 }, { "epoch": 0.2938791272446418, "grad_norm": 0.6592110991477966, "learning_rate": 9.933085071684288e-05, "loss": 0.7336, "step": 1522 }, { "epoch": 0.2940722147132651, "grad_norm": 0.5802565813064575, "learning_rate": 9.93281003733993e-05, "loss": 0.6901, "step": 1523 }, { "epoch": 0.2942653021818884, "grad_norm": 1.0527399778366089, "learning_rate": 9.932534442753222e-05, "loss": 0.6825, "step": 1524 }, { "epoch": 0.2944583896505117, "grad_norm": 0.9868066906929016, "learning_rate": 9.932258287955465e-05, "loss": 0.6212, "step": 1525 }, { "epoch": 0.294651477119135, "grad_norm": 0.5463937520980835, "learning_rate": 9.931981572978022e-05, "loss": 0.727, "step": 1526 }, { "epoch": 0.29484456458775826, "grad_norm": 0.7807517647743225, "learning_rate": 9.931704297852322e-05, "loss": 0.6689, "step": 1527 }, { "epoch": 0.29503765205638155, "grad_norm": 0.6111351847648621, "learning_rate": 9.931426462609855e-05, "loss": 0.6194, "step": 1528 }, { "epoch": 0.29523073952500484, "grad_norm": 0.5371273756027222, "learning_rate": 9.931148067282176e-05, "loss": 0.6737, "step": 1529 }, { "epoch": 0.2954238269936281, "grad_norm": 1.3690134286880493, "learning_rate": 9.930869111900907e-05, "loss": 0.7068, "step": 1530 }, { "epoch": 0.2956169144622514, "grad_norm": 1.3549903631210327, "learning_rate": 9.930589596497726e-05, "loss": 0.6527, "step": 1531 }, { "epoch": 0.2958100019308747, "grad_norm": 0.48728054761886597, "learning_rate": 9.930309521104382e-05, "loss": 0.6916, "step": 1532 }, { "epoch": 0.296003089399498, "grad_norm": 0.6489539742469788, "learning_rate": 9.930028885752682e-05, "loss": 0.6722, "step": 1533 }, { "epoch": 0.2961961768681213, "grad_norm": 0.8167202472686768, "learning_rate": 9.929747690474502e-05, "loss": 0.7011, "step": 1534 }, { "epoch": 0.29638926433674456, "grad_norm": 0.6002077460289001, "learning_rate": 9.929465935301774e-05, "loss": 0.6922, "step": 1535 }, { "epoch": 0.29658235180536785, "grad_norm": 5.422703266143799, "learning_rate": 9.929183620266504e-05, "loss": 0.7398, "step": 1536 }, { "epoch": 0.29677543927399114, "grad_norm": 0.9310723543167114, "learning_rate": 9.928900745400751e-05, "loss": 0.7278, "step": 1537 }, { "epoch": 0.2969685267426144, "grad_norm": 0.5278189778327942, "learning_rate": 9.928617310736645e-05, "loss": 0.6573, "step": 1538 }, { "epoch": 0.2971616142112377, "grad_norm": 0.6085413098335266, "learning_rate": 9.928333316306378e-05, "loss": 0.6916, "step": 1539 }, { "epoch": 0.297354701679861, "grad_norm": 0.5888639092445374, "learning_rate": 9.928048762142201e-05, "loss": 0.6904, "step": 1540 }, { "epoch": 0.2975477891484843, "grad_norm": 0.8633313775062561, "learning_rate": 9.927763648276435e-05, "loss": 0.7011, "step": 1541 }, { "epoch": 0.2977408766171076, "grad_norm": 0.6607233285903931, "learning_rate": 9.927477974741461e-05, "loss": 0.7178, "step": 1542 }, { "epoch": 0.29793396408573086, "grad_norm": 0.7382964491844177, "learning_rate": 9.927191741569724e-05, "loss": 0.6563, "step": 1543 }, { "epoch": 0.29812705155435415, "grad_norm": 0.6117023825645447, "learning_rate": 9.926904948793732e-05, "loss": 0.6828, "step": 1544 }, { "epoch": 0.2983201390229774, "grad_norm": 1.2446112632751465, "learning_rate": 9.92661759644606e-05, "loss": 0.6865, "step": 1545 }, { "epoch": 0.29851322649160067, "grad_norm": 0.8875361084938049, "learning_rate": 9.926329684559341e-05, "loss": 0.6709, "step": 1546 }, { "epoch": 0.29870631396022396, "grad_norm": 0.7592262625694275, "learning_rate": 9.926041213166278e-05, "loss": 0.6898, "step": 1547 }, { "epoch": 0.29889940142884724, "grad_norm": 1.054197072982788, "learning_rate": 9.92575218229963e-05, "loss": 0.6532, "step": 1548 }, { "epoch": 0.29909248889747053, "grad_norm": 0.5965207815170288, "learning_rate": 9.925462591992225e-05, "loss": 0.7001, "step": 1549 }, { "epoch": 0.2992855763660938, "grad_norm": 0.4844602346420288, "learning_rate": 9.925172442276955e-05, "loss": 0.6548, "step": 1550 }, { "epoch": 0.2994786638347171, "grad_norm": 0.5379801988601685, "learning_rate": 9.924881733186772e-05, "loss": 0.7374, "step": 1551 }, { "epoch": 0.2996717513033404, "grad_norm": 0.6723423004150391, "learning_rate": 9.924590464754694e-05, "loss": 0.6384, "step": 1552 }, { "epoch": 0.2998648387719637, "grad_norm": 0.573941171169281, "learning_rate": 9.9242986370138e-05, "loss": 0.7573, "step": 1553 }, { "epoch": 0.30005792624058697, "grad_norm": 0.6437848210334778, "learning_rate": 9.924006249997237e-05, "loss": 0.6907, "step": 1554 }, { "epoch": 0.30025101370921026, "grad_norm": 0.6032239198684692, "learning_rate": 9.92371330373821e-05, "loss": 0.6747, "step": 1555 }, { "epoch": 0.30044410117783354, "grad_norm": 0.7150604128837585, "learning_rate": 9.92341979826999e-05, "loss": 0.7109, "step": 1556 }, { "epoch": 0.30063718864645683, "grad_norm": 0.8771236538887024, "learning_rate": 9.923125733625915e-05, "loss": 0.6727, "step": 1557 }, { "epoch": 0.3008302761150801, "grad_norm": 1.3131420612335205, "learning_rate": 9.922831109839384e-05, "loss": 0.6543, "step": 1558 }, { "epoch": 0.3010233635837034, "grad_norm": 0.8537445664405823, "learning_rate": 9.922535926943853e-05, "loss": 0.6895, "step": 1559 }, { "epoch": 0.3012164510523267, "grad_norm": 0.9495795965194702, "learning_rate": 9.922240184972851e-05, "loss": 0.662, "step": 1560 }, { "epoch": 0.30140953852095, "grad_norm": 1.3337863683700562, "learning_rate": 9.921943883959968e-05, "loss": 0.6982, "step": 1561 }, { "epoch": 0.30160262598957327, "grad_norm": 0.6683966517448425, "learning_rate": 9.921647023938853e-05, "loss": 0.7078, "step": 1562 }, { "epoch": 0.30179571345819656, "grad_norm": 0.5916654467582703, "learning_rate": 9.921349604943224e-05, "loss": 0.6697, "step": 1563 }, { "epoch": 0.30198880092681984, "grad_norm": 0.7468128204345703, "learning_rate": 9.921051627006861e-05, "loss": 0.6831, "step": 1564 }, { "epoch": 0.30218188839544313, "grad_norm": 0.7601511478424072, "learning_rate": 9.920753090163605e-05, "loss": 0.7602, "step": 1565 }, { "epoch": 0.3023749758640664, "grad_norm": 0.6290722489356995, "learning_rate": 9.920453994447363e-05, "loss": 0.6514, "step": 1566 }, { "epoch": 0.3025680633326897, "grad_norm": 0.6184578537940979, "learning_rate": 9.920154339892104e-05, "loss": 0.701, "step": 1567 }, { "epoch": 0.302761150801313, "grad_norm": 0.5959380269050598, "learning_rate": 9.919854126531862e-05, "loss": 0.6117, "step": 1568 }, { "epoch": 0.3029542382699363, "grad_norm": 0.700853705406189, "learning_rate": 9.919553354400733e-05, "loss": 0.6201, "step": 1569 }, { "epoch": 0.30314732573855957, "grad_norm": 0.722135603427887, "learning_rate": 9.919252023532877e-05, "loss": 0.7181, "step": 1570 }, { "epoch": 0.30334041320718286, "grad_norm": 0.8009186387062073, "learning_rate": 9.918950133962518e-05, "loss": 0.6523, "step": 1571 }, { "epoch": 0.30353350067580614, "grad_norm": 0.9202940464019775, "learning_rate": 9.918647685723945e-05, "loss": 0.691, "step": 1572 }, { "epoch": 0.30372658814442943, "grad_norm": 0.9383727312088013, "learning_rate": 9.918344678851505e-05, "loss": 0.7034, "step": 1573 }, { "epoch": 0.3039196756130527, "grad_norm": 2.890355110168457, "learning_rate": 9.918041113379614e-05, "loss": 0.7377, "step": 1574 }, { "epoch": 0.304112763081676, "grad_norm": 0.7332344651222229, "learning_rate": 9.917736989342747e-05, "loss": 0.7133, "step": 1575 }, { "epoch": 0.3043058505502993, "grad_norm": 1.503989338874817, "learning_rate": 9.917432306775448e-05, "loss": 0.6917, "step": 1576 }, { "epoch": 0.3044989380189226, "grad_norm": 1.9535536766052246, "learning_rate": 9.91712706571232e-05, "loss": 0.7221, "step": 1577 }, { "epoch": 0.30469202548754587, "grad_norm": 0.7404228448867798, "learning_rate": 9.91682126618803e-05, "loss": 0.6993, "step": 1578 }, { "epoch": 0.30488511295616916, "grad_norm": 0.8305640816688538, "learning_rate": 9.916514908237309e-05, "loss": 0.6829, "step": 1579 }, { "epoch": 0.30507820042479245, "grad_norm": 0.773774266242981, "learning_rate": 9.916207991894952e-05, "loss": 0.6615, "step": 1580 }, { "epoch": 0.30527128789341573, "grad_norm": 0.8955283761024475, "learning_rate": 9.915900517195818e-05, "loss": 0.7182, "step": 1581 }, { "epoch": 0.305464375362039, "grad_norm": 0.8517575263977051, "learning_rate": 9.915592484174826e-05, "loss": 0.6337, "step": 1582 }, { "epoch": 0.3056574628306623, "grad_norm": 0.6925287246704102, "learning_rate": 9.915283892866962e-05, "loss": 0.6821, "step": 1583 }, { "epoch": 0.3058505502992856, "grad_norm": 0.769721269607544, "learning_rate": 9.914974743307275e-05, "loss": 0.6568, "step": 1584 }, { "epoch": 0.3060436377679089, "grad_norm": 1.4187017679214478, "learning_rate": 9.914665035530875e-05, "loss": 0.7817, "step": 1585 }, { "epoch": 0.30623672523653217, "grad_norm": 0.9568469524383545, "learning_rate": 9.914354769572938e-05, "loss": 0.7198, "step": 1586 }, { "epoch": 0.30642981270515546, "grad_norm": 0.7729285359382629, "learning_rate": 9.914043945468703e-05, "loss": 0.6828, "step": 1587 }, { "epoch": 0.30662290017377875, "grad_norm": 1.7016915082931519, "learning_rate": 9.91373256325347e-05, "loss": 0.6831, "step": 1588 }, { "epoch": 0.30681598764240203, "grad_norm": 0.8237716555595398, "learning_rate": 9.913420622962606e-05, "loss": 0.6518, "step": 1589 }, { "epoch": 0.3070090751110253, "grad_norm": 1.369483232498169, "learning_rate": 9.913108124631536e-05, "loss": 0.7211, "step": 1590 }, { "epoch": 0.3072021625796486, "grad_norm": 1.976918339729309, "learning_rate": 9.912795068295757e-05, "loss": 0.7125, "step": 1591 }, { "epoch": 0.30739525004827184, "grad_norm": 0.9048042893409729, "learning_rate": 9.91248145399082e-05, "loss": 0.6915, "step": 1592 }, { "epoch": 0.3075883375168951, "grad_norm": 0.8587135672569275, "learning_rate": 9.912167281752346e-05, "loss": 0.7172, "step": 1593 }, { "epoch": 0.3077814249855184, "grad_norm": 1.0033406019210815, "learning_rate": 9.911852551616017e-05, "loss": 0.6862, "step": 1594 }, { "epoch": 0.3079745124541417, "grad_norm": 1.096878170967102, "learning_rate": 9.911537263617576e-05, "loss": 0.6339, "step": 1595 }, { "epoch": 0.308167599922765, "grad_norm": 1.4640700817108154, "learning_rate": 9.911221417792833e-05, "loss": 0.7057, "step": 1596 }, { "epoch": 0.3083606873913883, "grad_norm": 1.171861171722412, "learning_rate": 9.910905014177662e-05, "loss": 0.652, "step": 1597 }, { "epoch": 0.30855377486001156, "grad_norm": 1.1176908016204834, "learning_rate": 9.910588052807997e-05, "loss": 0.6884, "step": 1598 }, { "epoch": 0.30874686232863485, "grad_norm": 1.0346368551254272, "learning_rate": 9.910270533719837e-05, "loss": 0.7059, "step": 1599 }, { "epoch": 0.30893994979725814, "grad_norm": 1.2436140775680542, "learning_rate": 9.909952456949243e-05, "loss": 0.7039, "step": 1600 }, { "epoch": 0.3091330372658814, "grad_norm": 1.0860390663146973, "learning_rate": 9.909633822532341e-05, "loss": 0.6426, "step": 1601 }, { "epoch": 0.3093261247345047, "grad_norm": 1.5744637250900269, "learning_rate": 9.909314630505322e-05, "loss": 0.6482, "step": 1602 }, { "epoch": 0.309519212203128, "grad_norm": 1.1047474145889282, "learning_rate": 9.908994880904434e-05, "loss": 0.7186, "step": 1603 }, { "epoch": 0.3097122996717513, "grad_norm": 1.0788202285766602, "learning_rate": 9.908674573765996e-05, "loss": 0.6739, "step": 1604 }, { "epoch": 0.3099053871403746, "grad_norm": 1.797585368156433, "learning_rate": 9.908353709126385e-05, "loss": 0.6415, "step": 1605 }, { "epoch": 0.31009847460899786, "grad_norm": 1.0016896724700928, "learning_rate": 9.908032287022045e-05, "loss": 0.6395, "step": 1606 }, { "epoch": 0.31029156207762115, "grad_norm": 1.2296042442321777, "learning_rate": 9.907710307489477e-05, "loss": 0.7274, "step": 1607 }, { "epoch": 0.31048464954624444, "grad_norm": 1.1615853309631348, "learning_rate": 9.907387770565255e-05, "loss": 0.7204, "step": 1608 }, { "epoch": 0.3106777370148677, "grad_norm": 1.0151056051254272, "learning_rate": 9.907064676286009e-05, "loss": 0.7167, "step": 1609 }, { "epoch": 0.310870824483491, "grad_norm": 1.7109456062316895, "learning_rate": 9.906741024688433e-05, "loss": 0.6396, "step": 1610 }, { "epoch": 0.3110639119521143, "grad_norm": 1.3216086626052856, "learning_rate": 9.906416815809288e-05, "loss": 0.6948, "step": 1611 }, { "epoch": 0.3112569994207376, "grad_norm": 0.8921527862548828, "learning_rate": 9.906092049685394e-05, "loss": 0.7072, "step": 1612 }, { "epoch": 0.3114500868893609, "grad_norm": 1.196081280708313, "learning_rate": 9.905766726353637e-05, "loss": 0.6508, "step": 1613 }, { "epoch": 0.31164317435798417, "grad_norm": 1.6827219724655151, "learning_rate": 9.905440845850965e-05, "loss": 0.6697, "step": 1614 }, { "epoch": 0.31183626182660745, "grad_norm": 1.877524971961975, "learning_rate": 9.90511440821439e-05, "loss": 0.7226, "step": 1615 }, { "epoch": 0.31202934929523074, "grad_norm": 1.0410016775131226, "learning_rate": 9.904787413480988e-05, "loss": 0.761, "step": 1616 }, { "epoch": 0.312222436763854, "grad_norm": 0.9307422041893005, "learning_rate": 9.904459861687896e-05, "loss": 0.6826, "step": 1617 }, { "epoch": 0.3124155242324773, "grad_norm": 1.1085429191589355, "learning_rate": 9.904131752872317e-05, "loss": 0.73, "step": 1618 }, { "epoch": 0.3126086117011006, "grad_norm": 2.253676176071167, "learning_rate": 9.903803087071513e-05, "loss": 0.7202, "step": 1619 }, { "epoch": 0.3128016991697239, "grad_norm": 1.0928741693496704, "learning_rate": 9.903473864322817e-05, "loss": 0.6408, "step": 1620 }, { "epoch": 0.3129947866383472, "grad_norm": 0.9820628762245178, "learning_rate": 9.903144084663615e-05, "loss": 0.6958, "step": 1621 }, { "epoch": 0.31318787410697047, "grad_norm": 0.8378533720970154, "learning_rate": 9.902813748131365e-05, "loss": 0.7356, "step": 1622 }, { "epoch": 0.31338096157559375, "grad_norm": 0.9157654047012329, "learning_rate": 9.902482854763583e-05, "loss": 0.6873, "step": 1623 }, { "epoch": 0.31357404904421704, "grad_norm": 1.1834144592285156, "learning_rate": 9.902151404597853e-05, "loss": 0.6244, "step": 1624 }, { "epoch": 0.31376713651284033, "grad_norm": 0.8272839188575745, "learning_rate": 9.901819397671817e-05, "loss": 0.6676, "step": 1625 }, { "epoch": 0.3139602239814636, "grad_norm": 3.3842995166778564, "learning_rate": 9.901486834023182e-05, "loss": 0.6754, "step": 1626 }, { "epoch": 0.3141533114500869, "grad_norm": 0.7582522630691528, "learning_rate": 9.90115371368972e-05, "loss": 0.7255, "step": 1627 }, { "epoch": 0.3143463989187102, "grad_norm": 4.317686080932617, "learning_rate": 9.900820036709264e-05, "loss": 0.7607, "step": 1628 }, { "epoch": 0.3145394863873335, "grad_norm": 1.37778902053833, "learning_rate": 9.900485803119713e-05, "loss": 0.6244, "step": 1629 }, { "epoch": 0.31473257385595677, "grad_norm": 2.3666324615478516, "learning_rate": 9.900151012959027e-05, "loss": 0.7736, "step": 1630 }, { "epoch": 0.31492566132458005, "grad_norm": 1.0466644763946533, "learning_rate": 9.899815666265228e-05, "loss": 0.6614, "step": 1631 }, { "epoch": 0.31511874879320334, "grad_norm": 1.1173762083053589, "learning_rate": 9.899479763076406e-05, "loss": 0.6792, "step": 1632 }, { "epoch": 0.31531183626182663, "grad_norm": 0.8083997368812561, "learning_rate": 9.899143303430708e-05, "loss": 0.7183, "step": 1633 }, { "epoch": 0.3155049237304499, "grad_norm": 0.7899964451789856, "learning_rate": 9.898806287366349e-05, "loss": 0.7029, "step": 1634 }, { "epoch": 0.3156980111990732, "grad_norm": 0.7082598805427551, "learning_rate": 9.898468714921605e-05, "loss": 0.6501, "step": 1635 }, { "epoch": 0.3158910986676965, "grad_norm": 2.331373929977417, "learning_rate": 9.898130586134815e-05, "loss": 0.6658, "step": 1636 }, { "epoch": 0.3160841861363198, "grad_norm": 1.1984397172927856, "learning_rate": 9.897791901044383e-05, "loss": 0.7291, "step": 1637 }, { "epoch": 0.31627727360494307, "grad_norm": 0.6284834742546082, "learning_rate": 9.897452659688775e-05, "loss": 0.6812, "step": 1638 }, { "epoch": 0.31647036107356635, "grad_norm": 0.9638805985450745, "learning_rate": 9.89711286210652e-05, "loss": 0.6858, "step": 1639 }, { "epoch": 0.3166634485421896, "grad_norm": 0.9418836236000061, "learning_rate": 9.896772508336209e-05, "loss": 0.6677, "step": 1640 }, { "epoch": 0.3168565360108129, "grad_norm": 0.5739036202430725, "learning_rate": 9.8964315984165e-05, "loss": 0.7428, "step": 1641 }, { "epoch": 0.31704962347943616, "grad_norm": 0.9050430655479431, "learning_rate": 9.896090132386112e-05, "loss": 0.7062, "step": 1642 }, { "epoch": 0.31724271094805945, "grad_norm": 0.5681109428405762, "learning_rate": 9.895748110283824e-05, "loss": 0.6748, "step": 1643 }, { "epoch": 0.31743579841668274, "grad_norm": 0.859761118888855, "learning_rate": 9.895405532148482e-05, "loss": 0.6592, "step": 1644 }, { "epoch": 0.317628885885306, "grad_norm": 0.7384536862373352, "learning_rate": 9.895062398018995e-05, "loss": 0.663, "step": 1645 }, { "epoch": 0.3178219733539293, "grad_norm": 0.8740972876548767, "learning_rate": 9.894718707934334e-05, "loss": 0.7079, "step": 1646 }, { "epoch": 0.3180150608225526, "grad_norm": 0.8946799039840698, "learning_rate": 9.894374461933534e-05, "loss": 0.6194, "step": 1647 }, { "epoch": 0.3182081482911759, "grad_norm": 1.0615233182907104, "learning_rate": 9.894029660055692e-05, "loss": 0.6907, "step": 1648 }, { "epoch": 0.3184012357597992, "grad_norm": 0.8695628046989441, "learning_rate": 9.89368430233997e-05, "loss": 0.6612, "step": 1649 }, { "epoch": 0.31859432322842246, "grad_norm": 0.7260878682136536, "learning_rate": 9.893338388825591e-05, "loss": 0.8208, "step": 1650 }, { "epoch": 0.31878741069704575, "grad_norm": 0.9157645106315613, "learning_rate": 9.89299191955184e-05, "loss": 0.6729, "step": 1651 }, { "epoch": 0.31898049816566904, "grad_norm": 0.9149498343467712, "learning_rate": 9.89264489455807e-05, "loss": 0.698, "step": 1652 }, { "epoch": 0.3191735856342923, "grad_norm": 1.0366286039352417, "learning_rate": 9.892297313883694e-05, "loss": 0.6143, "step": 1653 }, { "epoch": 0.3193666731029156, "grad_norm": 1.0167940855026245, "learning_rate": 9.891949177568186e-05, "loss": 0.6722, "step": 1654 }, { "epoch": 0.3195597605715389, "grad_norm": 0.7731537222862244, "learning_rate": 9.891600485651088e-05, "loss": 0.6758, "step": 1655 }, { "epoch": 0.3197528480401622, "grad_norm": 0.983245313167572, "learning_rate": 9.891251238172001e-05, "loss": 0.6814, "step": 1656 }, { "epoch": 0.3199459355087855, "grad_norm": 0.6638950109481812, "learning_rate": 9.890901435170592e-05, "loss": 0.6487, "step": 1657 }, { "epoch": 0.32013902297740876, "grad_norm": 1.7255228757858276, "learning_rate": 9.890551076686591e-05, "loss": 0.7071, "step": 1658 }, { "epoch": 0.32033211044603205, "grad_norm": 3.319305419921875, "learning_rate": 9.890200162759786e-05, "loss": 0.6975, "step": 1659 }, { "epoch": 0.32052519791465534, "grad_norm": 1.7485119104385376, "learning_rate": 9.889848693430035e-05, "loss": 0.7086, "step": 1660 }, { "epoch": 0.3207182853832786, "grad_norm": 0.717183530330658, "learning_rate": 9.889496668737253e-05, "loss": 0.6627, "step": 1661 }, { "epoch": 0.3209113728519019, "grad_norm": 0.6845531463623047, "learning_rate": 9.889144088721425e-05, "loss": 0.6947, "step": 1662 }, { "epoch": 0.3211044603205252, "grad_norm": 1.3403445482254028, "learning_rate": 9.888790953422593e-05, "loss": 0.6219, "step": 1663 }, { "epoch": 0.3212975477891485, "grad_norm": 0.7731978297233582, "learning_rate": 9.888437262880863e-05, "loss": 0.7055, "step": 1664 }, { "epoch": 0.3214906352577718, "grad_norm": 0.612545371055603, "learning_rate": 9.888083017136408e-05, "loss": 0.6025, "step": 1665 }, { "epoch": 0.32168372272639506, "grad_norm": 0.8786667585372925, "learning_rate": 9.88772821622946e-05, "loss": 0.7006, "step": 1666 }, { "epoch": 0.32187681019501835, "grad_norm": 0.6530813574790955, "learning_rate": 9.887372860200315e-05, "loss": 0.7167, "step": 1667 }, { "epoch": 0.32206989766364164, "grad_norm": 0.765361487865448, "learning_rate": 9.887016949089333e-05, "loss": 0.7085, "step": 1668 }, { "epoch": 0.3222629851322649, "grad_norm": 0.67545485496521, "learning_rate": 9.886660482936938e-05, "loss": 0.754, "step": 1669 }, { "epoch": 0.3224560726008882, "grad_norm": 0.8792630434036255, "learning_rate": 9.886303461783612e-05, "loss": 0.6484, "step": 1670 }, { "epoch": 0.3226491600695115, "grad_norm": 1.8984336853027344, "learning_rate": 9.885945885669907e-05, "loss": 0.6642, "step": 1671 }, { "epoch": 0.3228422475381348, "grad_norm": 0.930640459060669, "learning_rate": 9.885587754636431e-05, "loss": 0.7009, "step": 1672 }, { "epoch": 0.3230353350067581, "grad_norm": 0.9277173280715942, "learning_rate": 9.885229068723862e-05, "loss": 0.646, "step": 1673 }, { "epoch": 0.32322842247538136, "grad_norm": 0.5955201983451843, "learning_rate": 9.884869827972938e-05, "loss": 0.6296, "step": 1674 }, { "epoch": 0.32342150994400465, "grad_norm": 0.5515310764312744, "learning_rate": 9.884510032424456e-05, "loss": 0.6532, "step": 1675 }, { "epoch": 0.32361459741262794, "grad_norm": 0.6612389087677002, "learning_rate": 9.884149682119281e-05, "loss": 0.698, "step": 1676 }, { "epoch": 0.3238076848812512, "grad_norm": 0.6811112761497498, "learning_rate": 9.883788777098342e-05, "loss": 0.7347, "step": 1677 }, { "epoch": 0.3240007723498745, "grad_norm": 1.742127776145935, "learning_rate": 9.883427317402627e-05, "loss": 0.6838, "step": 1678 }, { "epoch": 0.3241938598184978, "grad_norm": 0.575971782207489, "learning_rate": 9.88306530307319e-05, "loss": 0.6383, "step": 1679 }, { "epoch": 0.3243869472871211, "grad_norm": 0.7284858226776123, "learning_rate": 9.882702734151143e-05, "loss": 0.6693, "step": 1680 }, { "epoch": 0.3245800347557444, "grad_norm": 0.5456998944282532, "learning_rate": 9.882339610677666e-05, "loss": 0.6415, "step": 1681 }, { "epoch": 0.32477312222436766, "grad_norm": 0.6699059009552002, "learning_rate": 9.881975932694005e-05, "loss": 0.7341, "step": 1682 }, { "epoch": 0.32496620969299095, "grad_norm": 0.6753578186035156, "learning_rate": 9.88161170024146e-05, "loss": 0.7164, "step": 1683 }, { "epoch": 0.32515929716161424, "grad_norm": 0.6410447955131531, "learning_rate": 9.8812469133614e-05, "loss": 0.6979, "step": 1684 }, { "epoch": 0.3253523846302375, "grad_norm": 0.8800226449966431, "learning_rate": 9.880881572095256e-05, "loss": 0.6588, "step": 1685 }, { "epoch": 0.3255454720988608, "grad_norm": 1.2317909002304077, "learning_rate": 9.880515676484519e-05, "loss": 0.6792, "step": 1686 }, { "epoch": 0.32573855956748404, "grad_norm": 0.6075188517570496, "learning_rate": 9.880149226570749e-05, "loss": 0.6859, "step": 1687 }, { "epoch": 0.32593164703610733, "grad_norm": 0.5194687843322754, "learning_rate": 9.879782222395562e-05, "loss": 0.6737, "step": 1688 }, { "epoch": 0.3261247345047306, "grad_norm": 0.5651945471763611, "learning_rate": 9.879414664000643e-05, "loss": 0.6979, "step": 1689 }, { "epoch": 0.3263178219733539, "grad_norm": 0.6808919906616211, "learning_rate": 9.879046551427739e-05, "loss": 0.7447, "step": 1690 }, { "epoch": 0.3265109094419772, "grad_norm": 0.6186671853065491, "learning_rate": 9.878677884718653e-05, "loss": 0.6744, "step": 1691 }, { "epoch": 0.3267039969106005, "grad_norm": 0.5697963237762451, "learning_rate": 9.87830866391526e-05, "loss": 0.6674, "step": 1692 }, { "epoch": 0.32689708437922377, "grad_norm": 0.6331979632377625, "learning_rate": 9.877938889059494e-05, "loss": 0.6947, "step": 1693 }, { "epoch": 0.32709017184784706, "grad_norm": 0.5839349031448364, "learning_rate": 9.87756856019335e-05, "loss": 0.7058, "step": 1694 }, { "epoch": 0.32728325931647034, "grad_norm": 2.158968210220337, "learning_rate": 9.87719767735889e-05, "loss": 0.6813, "step": 1695 }, { "epoch": 0.32747634678509363, "grad_norm": 0.8596954345703125, "learning_rate": 9.876826240598236e-05, "loss": 0.6534, "step": 1696 }, { "epoch": 0.3276694342537169, "grad_norm": 0.593704104423523, "learning_rate": 9.876454249953574e-05, "loss": 0.6626, "step": 1697 }, { "epoch": 0.3278625217223402, "grad_norm": 1.1740877628326416, "learning_rate": 9.876081705467153e-05, "loss": 0.6473, "step": 1698 }, { "epoch": 0.3280556091909635, "grad_norm": 0.7209458351135254, "learning_rate": 9.875708607181283e-05, "loss": 0.7398, "step": 1699 }, { "epoch": 0.3282486966595868, "grad_norm": 0.5210352540016174, "learning_rate": 9.875334955138341e-05, "loss": 0.741, "step": 1700 }, { "epoch": 0.32844178412821007, "grad_norm": 1.8631007671356201, "learning_rate": 9.874960749380763e-05, "loss": 0.7102, "step": 1701 }, { "epoch": 0.32863487159683336, "grad_norm": 0.9118615984916687, "learning_rate": 9.87458598995105e-05, "loss": 0.6779, "step": 1702 }, { "epoch": 0.32882795906545664, "grad_norm": 1.8163425922393799, "learning_rate": 9.874210676891764e-05, "loss": 0.6583, "step": 1703 }, { "epoch": 0.32902104653407993, "grad_norm": 0.484282910823822, "learning_rate": 9.873834810245531e-05, "loss": 0.6537, "step": 1704 }, { "epoch": 0.3292141340027032, "grad_norm": 0.6745563745498657, "learning_rate": 9.873458390055041e-05, "loss": 0.69, "step": 1705 }, { "epoch": 0.3294072214713265, "grad_norm": 0.5186164975166321, "learning_rate": 9.873081416363047e-05, "loss": 0.6341, "step": 1706 }, { "epoch": 0.3296003089399498, "grad_norm": 0.7042735815048218, "learning_rate": 9.872703889212362e-05, "loss": 0.6209, "step": 1707 }, { "epoch": 0.3297933964085731, "grad_norm": 0.75445955991745, "learning_rate": 9.872325808645863e-05, "loss": 0.6919, "step": 1708 }, { "epoch": 0.32998648387719637, "grad_norm": 0.8321040868759155, "learning_rate": 9.87194717470649e-05, "loss": 0.6849, "step": 1709 }, { "epoch": 0.33017957134581966, "grad_norm": 0.7877038717269897, "learning_rate": 9.871567987437249e-05, "loss": 0.7212, "step": 1710 }, { "epoch": 0.33037265881444294, "grad_norm": 1.0783828496932983, "learning_rate": 9.871188246881204e-05, "loss": 0.6826, "step": 1711 }, { "epoch": 0.33056574628306623, "grad_norm": 0.6051924824714661, "learning_rate": 9.870807953081484e-05, "loss": 0.7605, "step": 1712 }, { "epoch": 0.3307588337516895, "grad_norm": 1.050746202468872, "learning_rate": 9.870427106081282e-05, "loss": 0.6363, "step": 1713 }, { "epoch": 0.3309519212203128, "grad_norm": 0.47587838768959045, "learning_rate": 9.87004570592385e-05, "loss": 0.6195, "step": 1714 }, { "epoch": 0.3311450086889361, "grad_norm": 0.6732697486877441, "learning_rate": 9.869663752652508e-05, "loss": 0.7024, "step": 1715 }, { "epoch": 0.3313380961575594, "grad_norm": 0.6730125546455383, "learning_rate": 9.869281246310635e-05, "loss": 0.7567, "step": 1716 }, { "epoch": 0.33153118362618267, "grad_norm": 0.6224210262298584, "learning_rate": 9.868898186941675e-05, "loss": 0.6714, "step": 1717 }, { "epoch": 0.33172427109480596, "grad_norm": 0.8791462779045105, "learning_rate": 9.868514574589133e-05, "loss": 0.6544, "step": 1718 }, { "epoch": 0.33191735856342924, "grad_norm": 0.6032392978668213, "learning_rate": 9.868130409296576e-05, "loss": 0.6645, "step": 1719 }, { "epoch": 0.33211044603205253, "grad_norm": 0.7021769881248474, "learning_rate": 9.867745691107639e-05, "loss": 0.7195, "step": 1720 }, { "epoch": 0.3323035335006758, "grad_norm": 0.6592163443565369, "learning_rate": 9.867360420066015e-05, "loss": 0.7001, "step": 1721 }, { "epoch": 0.3324966209692991, "grad_norm": 1.012800693511963, "learning_rate": 9.866974596215461e-05, "loss": 0.6991, "step": 1722 }, { "epoch": 0.3326897084379224, "grad_norm": 0.9661222100257874, "learning_rate": 9.866588219599795e-05, "loss": 0.6664, "step": 1723 }, { "epoch": 0.3328827959065457, "grad_norm": 0.7440906763076782, "learning_rate": 9.8662012902629e-05, "loss": 0.7009, "step": 1724 }, { "epoch": 0.33307588337516897, "grad_norm": 0.7060573697090149, "learning_rate": 9.865813808248723e-05, "loss": 0.6344, "step": 1725 }, { "epoch": 0.33326897084379226, "grad_norm": 0.5428657531738281, "learning_rate": 9.865425773601273e-05, "loss": 0.6933, "step": 1726 }, { "epoch": 0.33346205831241554, "grad_norm": 0.5777361392974854, "learning_rate": 9.865037186364618e-05, "loss": 0.6741, "step": 1727 }, { "epoch": 0.33365514578103883, "grad_norm": 0.5709802508354187, "learning_rate": 9.864648046582894e-05, "loss": 0.6866, "step": 1728 }, { "epoch": 0.3338482332496621, "grad_norm": 0.6185213327407837, "learning_rate": 9.864258354300295e-05, "loss": 0.695, "step": 1729 }, { "epoch": 0.3340413207182854, "grad_norm": 0.7318896055221558, "learning_rate": 9.863868109561083e-05, "loss": 0.7044, "step": 1730 }, { "epoch": 0.3342344081869087, "grad_norm": 0.9932767748832703, "learning_rate": 9.86347731240958e-05, "loss": 0.6988, "step": 1731 }, { "epoch": 0.334427495655532, "grad_norm": 0.6477053165435791, "learning_rate": 9.863085962890167e-05, "loss": 0.6884, "step": 1732 }, { "epoch": 0.33462058312415527, "grad_norm": 0.7972424030303955, "learning_rate": 9.862694061047293e-05, "loss": 0.6911, "step": 1733 }, { "epoch": 0.33481367059277856, "grad_norm": 0.7017980217933655, "learning_rate": 9.86230160692547e-05, "loss": 0.6823, "step": 1734 }, { "epoch": 0.3350067580614018, "grad_norm": 0.6794313788414001, "learning_rate": 9.861908600569269e-05, "loss": 0.6847, "step": 1735 }, { "epoch": 0.3351998455300251, "grad_norm": 0.7495216727256775, "learning_rate": 9.861515042023328e-05, "loss": 0.7198, "step": 1736 }, { "epoch": 0.33539293299864836, "grad_norm": 0.9119370579719543, "learning_rate": 9.861120931332342e-05, "loss": 0.7366, "step": 1737 }, { "epoch": 0.33558602046727165, "grad_norm": 1.3177225589752197, "learning_rate": 9.860726268541074e-05, "loss": 0.6105, "step": 1738 }, { "epoch": 0.33577910793589494, "grad_norm": 0.8676249384880066, "learning_rate": 9.860331053694347e-05, "loss": 0.6899, "step": 1739 }, { "epoch": 0.3359721954045182, "grad_norm": 0.557127833366394, "learning_rate": 9.859935286837049e-05, "loss": 0.6764, "step": 1740 }, { "epoch": 0.3361652828731415, "grad_norm": 1.0318857431411743, "learning_rate": 9.859538968014126e-05, "loss": 0.7114, "step": 1741 }, { "epoch": 0.3363583703417648, "grad_norm": 2.4666261672973633, "learning_rate": 9.859142097270593e-05, "loss": 0.7293, "step": 1742 }, { "epoch": 0.3365514578103881, "grad_norm": 1.2592284679412842, "learning_rate": 9.858744674651521e-05, "loss": 0.7619, "step": 1743 }, { "epoch": 0.3367445452790114, "grad_norm": 0.7642198204994202, "learning_rate": 9.85834670020205e-05, "loss": 0.6734, "step": 1744 }, { "epoch": 0.33693763274763466, "grad_norm": 0.9903820753097534, "learning_rate": 9.857948173967379e-05, "loss": 0.7402, "step": 1745 }, { "epoch": 0.33713072021625795, "grad_norm": 0.5577307343482971, "learning_rate": 9.857549095992771e-05, "loss": 0.649, "step": 1746 }, { "epoch": 0.33732380768488124, "grad_norm": 0.5752320289611816, "learning_rate": 9.857149466323549e-05, "loss": 0.6972, "step": 1747 }, { "epoch": 0.3375168951535045, "grad_norm": 0.7904291749000549, "learning_rate": 9.856749285005105e-05, "loss": 0.7145, "step": 1748 }, { "epoch": 0.3377099826221278, "grad_norm": 0.9451952576637268, "learning_rate": 9.856348552082886e-05, "loss": 0.7116, "step": 1749 }, { "epoch": 0.3379030700907511, "grad_norm": 0.5308854579925537, "learning_rate": 9.855947267602404e-05, "loss": 0.6664, "step": 1750 }, { "epoch": 0.3380961575593744, "grad_norm": 0.7119319438934326, "learning_rate": 9.855545431609239e-05, "loss": 0.7483, "step": 1751 }, { "epoch": 0.3382892450279977, "grad_norm": 0.7412059903144836, "learning_rate": 9.855143044149027e-05, "loss": 0.6632, "step": 1752 }, { "epoch": 0.33848233249662096, "grad_norm": 0.8047969937324524, "learning_rate": 9.854740105267468e-05, "loss": 0.6874, "step": 1753 }, { "epoch": 0.33867541996524425, "grad_norm": 0.4834648072719574, "learning_rate": 9.854336615010326e-05, "loss": 0.6982, "step": 1754 }, { "epoch": 0.33886850743386754, "grad_norm": 0.7358660697937012, "learning_rate": 9.85393257342343e-05, "loss": 0.643, "step": 1755 }, { "epoch": 0.3390615949024908, "grad_norm": 1.2748271226882935, "learning_rate": 9.853527980552665e-05, "loss": 0.7049, "step": 1756 }, { "epoch": 0.3392546823711141, "grad_norm": 0.7970848679542542, "learning_rate": 9.853122836443988e-05, "loss": 0.6201, "step": 1757 }, { "epoch": 0.3394477698397374, "grad_norm": 0.5083798170089722, "learning_rate": 9.852717141143407e-05, "loss": 0.7099, "step": 1758 }, { "epoch": 0.3396408573083607, "grad_norm": 0.6691310405731201, "learning_rate": 9.852310894697002e-05, "loss": 0.7163, "step": 1759 }, { "epoch": 0.339833944776984, "grad_norm": 0.6923084259033203, "learning_rate": 9.851904097150913e-05, "loss": 0.6815, "step": 1760 }, { "epoch": 0.34002703224560726, "grad_norm": 0.7843384742736816, "learning_rate": 9.851496748551337e-05, "loss": 0.617, "step": 1761 }, { "epoch": 0.34022011971423055, "grad_norm": 2.582313299179077, "learning_rate": 9.851088848944546e-05, "loss": 0.714, "step": 1762 }, { "epoch": 0.34041320718285384, "grad_norm": 0.6152886748313904, "learning_rate": 9.850680398376861e-05, "loss": 0.703, "step": 1763 }, { "epoch": 0.3406062946514771, "grad_norm": 0.9455204010009766, "learning_rate": 9.850271396894674e-05, "loss": 0.6667, "step": 1764 }, { "epoch": 0.3407993821201004, "grad_norm": 0.5541189908981323, "learning_rate": 9.849861844544437e-05, "loss": 0.6052, "step": 1765 }, { "epoch": 0.3409924695887237, "grad_norm": 0.9880726933479309, "learning_rate": 9.849451741372663e-05, "loss": 0.7341, "step": 1766 }, { "epoch": 0.341185557057347, "grad_norm": 0.9382752180099487, "learning_rate": 9.849041087425934e-05, "loss": 0.7046, "step": 1767 }, { "epoch": 0.3413786445259703, "grad_norm": 1.091610312461853, "learning_rate": 9.848629882750886e-05, "loss": 0.649, "step": 1768 }, { "epoch": 0.34157173199459356, "grad_norm": 0.5764415860176086, "learning_rate": 9.848218127394222e-05, "loss": 0.6541, "step": 1769 }, { "epoch": 0.34176481946321685, "grad_norm": 1.5598621368408203, "learning_rate": 9.847805821402709e-05, "loss": 0.6874, "step": 1770 }, { "epoch": 0.34195790693184014, "grad_norm": 0.5335362553596497, "learning_rate": 9.84739296482317e-05, "loss": 0.6639, "step": 1771 }, { "epoch": 0.3421509944004634, "grad_norm": 0.5239335298538208, "learning_rate": 9.8469795577025e-05, "loss": 0.6631, "step": 1772 }, { "epoch": 0.3423440818690867, "grad_norm": 0.6709922552108765, "learning_rate": 9.84656560008765e-05, "loss": 0.6409, "step": 1773 }, { "epoch": 0.34253716933771, "grad_norm": 0.9202570915222168, "learning_rate": 9.846151092025636e-05, "loss": 0.6678, "step": 1774 }, { "epoch": 0.3427302568063333, "grad_norm": 0.6687533855438232, "learning_rate": 9.845736033563531e-05, "loss": 0.6142, "step": 1775 }, { "epoch": 0.3429233442749566, "grad_norm": 0.6401404142379761, "learning_rate": 9.845320424748478e-05, "loss": 0.6735, "step": 1776 }, { "epoch": 0.34311643174357986, "grad_norm": 2.047651767730713, "learning_rate": 9.844904265627683e-05, "loss": 0.6573, "step": 1777 }, { "epoch": 0.34330951921220315, "grad_norm": 0.8848451972007751, "learning_rate": 9.844487556248407e-05, "loss": 0.7286, "step": 1778 }, { "epoch": 0.34350260668082644, "grad_norm": 0.5822436809539795, "learning_rate": 9.844070296657979e-05, "loss": 0.7247, "step": 1779 }, { "epoch": 0.3436956941494497, "grad_norm": 0.7175307273864746, "learning_rate": 9.84365248690379e-05, "loss": 0.6832, "step": 1780 }, { "epoch": 0.343888781618073, "grad_norm": 2.0568370819091797, "learning_rate": 9.84323412703329e-05, "loss": 0.7077, "step": 1781 }, { "epoch": 0.34408186908669625, "grad_norm": 0.5528221726417542, "learning_rate": 9.842815217093995e-05, "loss": 0.6407, "step": 1782 }, { "epoch": 0.34427495655531953, "grad_norm": 0.727939784526825, "learning_rate": 9.842395757133486e-05, "loss": 0.6038, "step": 1783 }, { "epoch": 0.3444680440239428, "grad_norm": 0.6129341721534729, "learning_rate": 9.841975747199399e-05, "loss": 0.6846, "step": 1784 }, { "epoch": 0.3446611314925661, "grad_norm": 0.6059108376502991, "learning_rate": 9.841555187339437e-05, "loss": 0.6474, "step": 1785 }, { "epoch": 0.3448542189611894, "grad_norm": 0.757814884185791, "learning_rate": 9.841134077601367e-05, "loss": 0.685, "step": 1786 }, { "epoch": 0.3450473064298127, "grad_norm": 0.5750999450683594, "learning_rate": 9.840712418033013e-05, "loss": 0.6196, "step": 1787 }, { "epoch": 0.34524039389843597, "grad_norm": 0.7029934525489807, "learning_rate": 9.84029020868227e-05, "loss": 0.6624, "step": 1788 }, { "epoch": 0.34543348136705926, "grad_norm": 0.5740304589271545, "learning_rate": 9.839867449597084e-05, "loss": 0.6577, "step": 1789 }, { "epoch": 0.34562656883568255, "grad_norm": 0.5549677014350891, "learning_rate": 9.839444140825476e-05, "loss": 0.6341, "step": 1790 }, { "epoch": 0.34581965630430583, "grad_norm": 1.05604887008667, "learning_rate": 9.839020282415519e-05, "loss": 0.6848, "step": 1791 }, { "epoch": 0.3460127437729291, "grad_norm": 0.6285136342048645, "learning_rate": 9.838595874415354e-05, "loss": 0.6964, "step": 1792 }, { "epoch": 0.3462058312415524, "grad_norm": 0.8663459420204163, "learning_rate": 9.838170916873181e-05, "loss": 0.7132, "step": 1793 }, { "epoch": 0.3463989187101757, "grad_norm": 1.2521921396255493, "learning_rate": 9.837745409837268e-05, "loss": 0.6938, "step": 1794 }, { "epoch": 0.346592006178799, "grad_norm": 0.7441378831863403, "learning_rate": 9.83731935335594e-05, "loss": 0.6613, "step": 1795 }, { "epoch": 0.34678509364742227, "grad_norm": 0.5416942238807678, "learning_rate": 9.836892747477586e-05, "loss": 0.6808, "step": 1796 }, { "epoch": 0.34697818111604556, "grad_norm": 1.2253633737564087, "learning_rate": 9.836465592250659e-05, "loss": 0.6946, "step": 1797 }, { "epoch": 0.34717126858466885, "grad_norm": 0.6709808111190796, "learning_rate": 9.83603788772367e-05, "loss": 0.6645, "step": 1798 }, { "epoch": 0.34736435605329213, "grad_norm": 0.9192583560943604, "learning_rate": 9.835609633945199e-05, "loss": 0.7463, "step": 1799 }, { "epoch": 0.3475574435219154, "grad_norm": 0.8570939302444458, "learning_rate": 9.835180830963882e-05, "loss": 0.6965, "step": 1800 }, { "epoch": 0.3477505309905387, "grad_norm": 0.6405459046363831, "learning_rate": 9.834751478828421e-05, "loss": 0.5838, "step": 1801 }, { "epoch": 0.347943618459162, "grad_norm": 0.5664308071136475, "learning_rate": 9.834321577587582e-05, "loss": 0.6667, "step": 1802 }, { "epoch": 0.3481367059277853, "grad_norm": 0.9884966611862183, "learning_rate": 9.833891127290187e-05, "loss": 0.6407, "step": 1803 }, { "epoch": 0.34832979339640857, "grad_norm": 0.7636159658432007, "learning_rate": 9.833460127985125e-05, "loss": 0.6122, "step": 1804 }, { "epoch": 0.34852288086503186, "grad_norm": 0.5256569981575012, "learning_rate": 9.833028579721348e-05, "loss": 0.6673, "step": 1805 }, { "epoch": 0.34871596833365515, "grad_norm": 0.6083237528800964, "learning_rate": 9.83259648254787e-05, "loss": 0.6814, "step": 1806 }, { "epoch": 0.34890905580227843, "grad_norm": 0.8941516876220703, "learning_rate": 9.832163836513763e-05, "loss": 0.7059, "step": 1807 }, { "epoch": 0.3491021432709017, "grad_norm": 0.696498692035675, "learning_rate": 9.831730641668169e-05, "loss": 0.6709, "step": 1808 }, { "epoch": 0.349295230739525, "grad_norm": 0.7053379416465759, "learning_rate": 9.831296898060283e-05, "loss": 0.645, "step": 1809 }, { "epoch": 0.3494883182081483, "grad_norm": 0.7353898286819458, "learning_rate": 9.83086260573937e-05, "loss": 0.6546, "step": 1810 }, { "epoch": 0.3496814056767716, "grad_norm": 0.9265483617782593, "learning_rate": 9.830427764754754e-05, "loss": 0.6899, "step": 1811 }, { "epoch": 0.34987449314539487, "grad_norm": 0.7576671242713928, "learning_rate": 9.829992375155824e-05, "loss": 0.6859, "step": 1812 }, { "epoch": 0.35006758061401816, "grad_norm": 0.596615731716156, "learning_rate": 9.829556436992027e-05, "loss": 0.6937, "step": 1813 }, { "epoch": 0.35026066808264145, "grad_norm": 0.5819236636161804, "learning_rate": 9.829119950312875e-05, "loss": 0.6605, "step": 1814 }, { "epoch": 0.35045375555126473, "grad_norm": 0.9819434285163879, "learning_rate": 9.828682915167942e-05, "loss": 0.6479, "step": 1815 }, { "epoch": 0.350646843019888, "grad_norm": 0.6828585267066956, "learning_rate": 9.828245331606862e-05, "loss": 0.5632, "step": 1816 }, { "epoch": 0.3508399304885113, "grad_norm": 0.6255796551704407, "learning_rate": 9.827807199679337e-05, "loss": 0.7198, "step": 1817 }, { "epoch": 0.3510330179571346, "grad_norm": 0.609521746635437, "learning_rate": 9.827368519435128e-05, "loss": 0.5982, "step": 1818 }, { "epoch": 0.3512261054257579, "grad_norm": 0.6902616620063782, "learning_rate": 9.826929290924054e-05, "loss": 0.685, "step": 1819 }, { "epoch": 0.35141919289438117, "grad_norm": 0.5625342726707458, "learning_rate": 9.826489514196002e-05, "loss": 0.6912, "step": 1820 }, { "epoch": 0.35161228036300446, "grad_norm": 2.82374906539917, "learning_rate": 9.82604918930092e-05, "loss": 0.7284, "step": 1821 }, { "epoch": 0.35180536783162775, "grad_norm": 0.5098397135734558, "learning_rate": 9.825608316288819e-05, "loss": 0.593, "step": 1822 }, { "epoch": 0.35199845530025103, "grad_norm": 0.5573100447654724, "learning_rate": 9.825166895209767e-05, "loss": 0.7189, "step": 1823 }, { "epoch": 0.3521915427688743, "grad_norm": 0.6633525490760803, "learning_rate": 9.824724926113904e-05, "loss": 0.6565, "step": 1824 }, { "epoch": 0.3523846302374976, "grad_norm": 2.9567394256591797, "learning_rate": 9.824282409051422e-05, "loss": 0.6226, "step": 1825 }, { "epoch": 0.3525777177061209, "grad_norm": 0.5474629402160645, "learning_rate": 9.82383934407258e-05, "loss": 0.6742, "step": 1826 }, { "epoch": 0.3527708051747442, "grad_norm": 0.5510686635971069, "learning_rate": 9.823395731227701e-05, "loss": 0.6473, "step": 1827 }, { "epoch": 0.35296389264336747, "grad_norm": 0.5478686094284058, "learning_rate": 9.822951570567167e-05, "loss": 0.7085, "step": 1828 }, { "epoch": 0.35315698011199076, "grad_norm": 0.6830878853797913, "learning_rate": 9.822506862141426e-05, "loss": 0.6836, "step": 1829 }, { "epoch": 0.353350067580614, "grad_norm": 0.6631760597229004, "learning_rate": 9.82206160600098e-05, "loss": 0.6979, "step": 1830 }, { "epoch": 0.3535431550492373, "grad_norm": 0.6892408728599548, "learning_rate": 9.821615802196403e-05, "loss": 0.7357, "step": 1831 }, { "epoch": 0.35373624251786057, "grad_norm": 0.6806395649909973, "learning_rate": 9.821169450778327e-05, "loss": 0.6945, "step": 1832 }, { "epoch": 0.35392932998648385, "grad_norm": 0.6291873455047607, "learning_rate": 9.820722551797446e-05, "loss": 0.6171, "step": 1833 }, { "epoch": 0.35412241745510714, "grad_norm": 1.6594206094741821, "learning_rate": 9.820275105304513e-05, "loss": 0.7471, "step": 1834 }, { "epoch": 0.35431550492373043, "grad_norm": 0.5591785311698914, "learning_rate": 9.819827111350351e-05, "loss": 0.666, "step": 1835 }, { "epoch": 0.3545085923923537, "grad_norm": 0.6068512201309204, "learning_rate": 9.819378569985839e-05, "loss": 0.6939, "step": 1836 }, { "epoch": 0.354701679860977, "grad_norm": 1.1519393920898438, "learning_rate": 9.81892948126192e-05, "loss": 0.6185, "step": 1837 }, { "epoch": 0.3548947673296003, "grad_norm": 0.49560338258743286, "learning_rate": 9.8184798452296e-05, "loss": 0.6809, "step": 1838 }, { "epoch": 0.3550878547982236, "grad_norm": 0.5884016752243042, "learning_rate": 9.818029661939946e-05, "loss": 0.6759, "step": 1839 }, { "epoch": 0.35528094226684687, "grad_norm": 0.7332892417907715, "learning_rate": 9.817578931444086e-05, "loss": 0.7021, "step": 1840 }, { "epoch": 0.35547402973547015, "grad_norm": 0.714505672454834, "learning_rate": 9.817127653793213e-05, "loss": 0.6965, "step": 1841 }, { "epoch": 0.35566711720409344, "grad_norm": 0.531894326210022, "learning_rate": 9.816675829038579e-05, "loss": 0.6171, "step": 1842 }, { "epoch": 0.35586020467271673, "grad_norm": 0.718117356300354, "learning_rate": 9.816223457231501e-05, "loss": 0.7036, "step": 1843 }, { "epoch": 0.35605329214134, "grad_norm": 0.4753077030181885, "learning_rate": 9.815770538423358e-05, "loss": 0.682, "step": 1844 }, { "epoch": 0.3562463796099633, "grad_norm": 1.266327977180481, "learning_rate": 9.815317072665588e-05, "loss": 0.6161, "step": 1845 }, { "epoch": 0.3564394670785866, "grad_norm": 0.6140932440757751, "learning_rate": 9.814863060009696e-05, "loss": 0.6742, "step": 1846 }, { "epoch": 0.3566325545472099, "grad_norm": 0.7337337136268616, "learning_rate": 9.814408500507244e-05, "loss": 0.6183, "step": 1847 }, { "epoch": 0.35682564201583317, "grad_norm": 0.5344495177268982, "learning_rate": 9.813953394209859e-05, "loss": 0.7736, "step": 1848 }, { "epoch": 0.35701872948445645, "grad_norm": 0.8335717916488647, "learning_rate": 9.813497741169229e-05, "loss": 0.6572, "step": 1849 }, { "epoch": 0.35721181695307974, "grad_norm": 1.0278196334838867, "learning_rate": 9.813041541437107e-05, "loss": 0.6576, "step": 1850 }, { "epoch": 0.35740490442170303, "grad_norm": 0.6582840085029602, "learning_rate": 9.812584795065303e-05, "loss": 0.6584, "step": 1851 }, { "epoch": 0.3575979918903263, "grad_norm": 1.3846503496170044, "learning_rate": 9.812127502105693e-05, "loss": 0.6881, "step": 1852 }, { "epoch": 0.3577910793589496, "grad_norm": 0.8441597819328308, "learning_rate": 9.811669662610215e-05, "loss": 0.6368, "step": 1853 }, { "epoch": 0.3579841668275729, "grad_norm": 0.541083574295044, "learning_rate": 9.811211276630865e-05, "loss": 0.6732, "step": 1854 }, { "epoch": 0.3581772542961962, "grad_norm": 0.855152428150177, "learning_rate": 9.810752344219707e-05, "loss": 0.6326, "step": 1855 }, { "epoch": 0.35837034176481947, "grad_norm": 0.766482949256897, "learning_rate": 9.810292865428863e-05, "loss": 0.705, "step": 1856 }, { "epoch": 0.35856342923344275, "grad_norm": 0.4736563563346863, "learning_rate": 9.809832840310517e-05, "loss": 0.7058, "step": 1857 }, { "epoch": 0.35875651670206604, "grad_norm": 0.611826479434967, "learning_rate": 9.80937226891692e-05, "loss": 0.6886, "step": 1858 }, { "epoch": 0.35894960417068933, "grad_norm": 4.295565605163574, "learning_rate": 9.808911151300375e-05, "loss": 0.6938, "step": 1859 }, { "epoch": 0.3591426916393126, "grad_norm": 1.6129732131958008, "learning_rate": 9.808449487513259e-05, "loss": 0.7749, "step": 1860 }, { "epoch": 0.3593357791079359, "grad_norm": 0.8034874796867371, "learning_rate": 9.807987277608004e-05, "loss": 0.6772, "step": 1861 }, { "epoch": 0.3595288665765592, "grad_norm": 0.5689219236373901, "learning_rate": 9.807524521637102e-05, "loss": 0.6412, "step": 1862 }, { "epoch": 0.3597219540451825, "grad_norm": 0.6015222072601318, "learning_rate": 9.807061219653115e-05, "loss": 0.6906, "step": 1863 }, { "epoch": 0.35991504151380577, "grad_norm": 0.678435742855072, "learning_rate": 9.806597371708659e-05, "loss": 0.7228, "step": 1864 }, { "epoch": 0.36010812898242905, "grad_norm": 0.8032293915748596, "learning_rate": 9.806132977856418e-05, "loss": 0.6404, "step": 1865 }, { "epoch": 0.36030121645105234, "grad_norm": 1.036181926727295, "learning_rate": 9.805668038149134e-05, "loss": 0.6857, "step": 1866 }, { "epoch": 0.36049430391967563, "grad_norm": 1.1367930173873901, "learning_rate": 9.805202552639612e-05, "loss": 0.6589, "step": 1867 }, { "epoch": 0.3606873913882989, "grad_norm": 0.635886549949646, "learning_rate": 9.80473652138072e-05, "loss": 0.6973, "step": 1868 }, { "epoch": 0.3608804788569222, "grad_norm": 0.7950571775436401, "learning_rate": 9.804269944425387e-05, "loss": 0.6015, "step": 1869 }, { "epoch": 0.3610735663255455, "grad_norm": 0.7119146585464478, "learning_rate": 9.803802821826603e-05, "loss": 0.6869, "step": 1870 }, { "epoch": 0.3612666537941688, "grad_norm": 0.6382442116737366, "learning_rate": 9.803335153637424e-05, "loss": 0.7087, "step": 1871 }, { "epoch": 0.36145974126279207, "grad_norm": 0.6798009872436523, "learning_rate": 9.802866939910965e-05, "loss": 0.6791, "step": 1872 }, { "epoch": 0.36165282873141535, "grad_norm": 0.8318861126899719, "learning_rate": 9.8023981807004e-05, "loss": 0.7373, "step": 1873 }, { "epoch": 0.36184591620003864, "grad_norm": 0.5133716464042664, "learning_rate": 9.801928876058972e-05, "loss": 0.6922, "step": 1874 }, { "epoch": 0.36203900366866193, "grad_norm": 0.8459396362304688, "learning_rate": 9.80145902603998e-05, "loss": 0.713, "step": 1875 }, { "epoch": 0.3622320911372852, "grad_norm": 0.5792282223701477, "learning_rate": 9.800988630696788e-05, "loss": 0.6441, "step": 1876 }, { "epoch": 0.36242517860590845, "grad_norm": 0.5341281294822693, "learning_rate": 9.800517690082822e-05, "loss": 0.6198, "step": 1877 }, { "epoch": 0.36261826607453174, "grad_norm": 0.6323694586753845, "learning_rate": 9.800046204251567e-05, "loss": 0.6173, "step": 1878 }, { "epoch": 0.362811353543155, "grad_norm": 0.6528891921043396, "learning_rate": 9.799574173256572e-05, "loss": 0.6437, "step": 1879 }, { "epoch": 0.3630044410117783, "grad_norm": 0.8434657454490662, "learning_rate": 9.799101597151448e-05, "loss": 0.6708, "step": 1880 }, { "epoch": 0.3631975284804016, "grad_norm": 0.6357846260070801, "learning_rate": 9.798628475989868e-05, "loss": 0.6544, "step": 1881 }, { "epoch": 0.3633906159490249, "grad_norm": 1.0989017486572266, "learning_rate": 9.798154809825566e-05, "loss": 0.6749, "step": 1882 }, { "epoch": 0.3635837034176482, "grad_norm": 0.6988533139228821, "learning_rate": 9.79768059871234e-05, "loss": 0.6858, "step": 1883 }, { "epoch": 0.36377679088627146, "grad_norm": 0.9442137479782104, "learning_rate": 9.797205842704047e-05, "loss": 0.6818, "step": 1884 }, { "epoch": 0.36396987835489475, "grad_norm": 0.8000372648239136, "learning_rate": 9.796730541854606e-05, "loss": 0.6507, "step": 1885 }, { "epoch": 0.36416296582351804, "grad_norm": 0.5941499471664429, "learning_rate": 9.796254696218004e-05, "loss": 0.6816, "step": 1886 }, { "epoch": 0.3643560532921413, "grad_norm": 0.608208417892456, "learning_rate": 9.79577830584828e-05, "loss": 0.6702, "step": 1887 }, { "epoch": 0.3645491407607646, "grad_norm": 0.9971293807029724, "learning_rate": 9.795301370799541e-05, "loss": 0.7118, "step": 1888 }, { "epoch": 0.3647422282293879, "grad_norm": 0.8670103549957275, "learning_rate": 9.794823891125955e-05, "loss": 0.7031, "step": 1889 }, { "epoch": 0.3649353156980112, "grad_norm": 0.9122008681297302, "learning_rate": 9.794345866881755e-05, "loss": 0.6299, "step": 1890 }, { "epoch": 0.3651284031666345, "grad_norm": 0.5488423109054565, "learning_rate": 9.793867298121227e-05, "loss": 0.6543, "step": 1891 }, { "epoch": 0.36532149063525776, "grad_norm": 1.9786765575408936, "learning_rate": 9.793388184898728e-05, "loss": 0.6012, "step": 1892 }, { "epoch": 0.36551457810388105, "grad_norm": 0.6002109050750732, "learning_rate": 9.792908527268672e-05, "loss": 0.6532, "step": 1893 }, { "epoch": 0.36570766557250434, "grad_norm": 0.8689890503883362, "learning_rate": 9.792428325285534e-05, "loss": 0.7057, "step": 1894 }, { "epoch": 0.3659007530411276, "grad_norm": 0.490351140499115, "learning_rate": 9.791947579003856e-05, "loss": 0.6443, "step": 1895 }, { "epoch": 0.3660938405097509, "grad_norm": 0.7487685680389404, "learning_rate": 9.791466288478237e-05, "loss": 0.7089, "step": 1896 }, { "epoch": 0.3662869279783742, "grad_norm": 0.8080576062202454, "learning_rate": 9.79098445376334e-05, "loss": 0.6961, "step": 1897 }, { "epoch": 0.3664800154469975, "grad_norm": 0.7748355269432068, "learning_rate": 9.790502074913889e-05, "loss": 0.7102, "step": 1898 }, { "epoch": 0.3666731029156208, "grad_norm": 1.3546241521835327, "learning_rate": 9.79001915198467e-05, "loss": 0.7113, "step": 1899 }, { "epoch": 0.36686619038424406, "grad_norm": 0.5970446467399597, "learning_rate": 9.789535685030531e-05, "loss": 0.6227, "step": 1900 }, { "epoch": 0.36705927785286735, "grad_norm": 0.5386084318161011, "learning_rate": 9.78905167410638e-05, "loss": 0.6971, "step": 1901 }, { "epoch": 0.36725236532149064, "grad_norm": 1.6344592571258545, "learning_rate": 9.788567119267192e-05, "loss": 0.6374, "step": 1902 }, { "epoch": 0.3674454527901139, "grad_norm": 0.6520715951919556, "learning_rate": 9.788082020567995e-05, "loss": 0.6629, "step": 1903 }, { "epoch": 0.3676385402587372, "grad_norm": 0.7538492679595947, "learning_rate": 9.78759637806389e-05, "loss": 0.7467, "step": 1904 }, { "epoch": 0.3678316277273605, "grad_norm": 0.8408263921737671, "learning_rate": 9.787110191810027e-05, "loss": 0.702, "step": 1905 }, { "epoch": 0.3680247151959838, "grad_norm": 0.645703136920929, "learning_rate": 9.78662346186163e-05, "loss": 0.6848, "step": 1906 }, { "epoch": 0.3682178026646071, "grad_norm": 0.9063574075698853, "learning_rate": 9.786136188273977e-05, "loss": 0.7152, "step": 1907 }, { "epoch": 0.36841089013323036, "grad_norm": 0.620022714138031, "learning_rate": 9.785648371102412e-05, "loss": 0.6379, "step": 1908 }, { "epoch": 0.36860397760185365, "grad_norm": 0.6106616258621216, "learning_rate": 9.785160010402334e-05, "loss": 0.608, "step": 1909 }, { "epoch": 0.36879706507047694, "grad_norm": 0.6825859546661377, "learning_rate": 9.784671106229213e-05, "loss": 0.6917, "step": 1910 }, { "epoch": 0.3689901525391002, "grad_norm": 0.733447253704071, "learning_rate": 9.784181658638574e-05, "loss": 0.7097, "step": 1911 }, { "epoch": 0.3691832400077235, "grad_norm": 0.5581374168395996, "learning_rate": 9.783691667686006e-05, "loss": 0.6261, "step": 1912 }, { "epoch": 0.3693763274763468, "grad_norm": 1.3272960186004639, "learning_rate": 9.78320113342716e-05, "loss": 0.7271, "step": 1913 }, { "epoch": 0.3695694149449701, "grad_norm": 0.6329229474067688, "learning_rate": 9.782710055917748e-05, "loss": 0.6922, "step": 1914 }, { "epoch": 0.3697625024135934, "grad_norm": 0.6220424175262451, "learning_rate": 9.782218435213543e-05, "loss": 0.6498, "step": 1915 }, { "epoch": 0.36995558988221666, "grad_norm": 1.1218366622924805, "learning_rate": 9.781726271370383e-05, "loss": 0.6391, "step": 1916 }, { "epoch": 0.37014867735083995, "grad_norm": 0.949354887008667, "learning_rate": 9.781233564444165e-05, "loss": 0.6655, "step": 1917 }, { "epoch": 0.37034176481946324, "grad_norm": 0.8553543090820312, "learning_rate": 9.780740314490845e-05, "loss": 0.6662, "step": 1918 }, { "epoch": 0.3705348522880865, "grad_norm": 0.6082737445831299, "learning_rate": 9.780246521566448e-05, "loss": 0.6566, "step": 1919 }, { "epoch": 0.3707279397567098, "grad_norm": 0.7921109199523926, "learning_rate": 9.779752185727053e-05, "loss": 0.7233, "step": 1920 }, { "epoch": 0.3709210272253331, "grad_norm": 0.7472334504127502, "learning_rate": 9.779257307028804e-05, "loss": 0.6791, "step": 1921 }, { "epoch": 0.3711141146939564, "grad_norm": 0.706367552280426, "learning_rate": 9.778761885527909e-05, "loss": 0.6534, "step": 1922 }, { "epoch": 0.3713072021625797, "grad_norm": 0.9292275309562683, "learning_rate": 9.778265921280635e-05, "loss": 0.6946, "step": 1923 }, { "epoch": 0.37150028963120296, "grad_norm": 0.6396751999855042, "learning_rate": 9.777769414343309e-05, "loss": 0.6882, "step": 1924 }, { "epoch": 0.3716933770998262, "grad_norm": 1.1057523488998413, "learning_rate": 9.777272364772324e-05, "loss": 0.6446, "step": 1925 }, { "epoch": 0.3718864645684495, "grad_norm": 0.5791231393814087, "learning_rate": 9.77677477262413e-05, "loss": 0.5864, "step": 1926 }, { "epoch": 0.37207955203707277, "grad_norm": 0.6764203310012817, "learning_rate": 9.776276637955243e-05, "loss": 0.7109, "step": 1927 }, { "epoch": 0.37227263950569606, "grad_norm": 0.640094518661499, "learning_rate": 9.775777960822237e-05, "loss": 0.6675, "step": 1928 }, { "epoch": 0.37246572697431934, "grad_norm": 1.845334529876709, "learning_rate": 9.775278741281751e-05, "loss": 0.6317, "step": 1929 }, { "epoch": 0.37265881444294263, "grad_norm": 0.5916251540184021, "learning_rate": 9.77477897939048e-05, "loss": 0.6568, "step": 1930 }, { "epoch": 0.3728519019115659, "grad_norm": 0.6680988669395447, "learning_rate": 9.774278675205188e-05, "loss": 0.6922, "step": 1931 }, { "epoch": 0.3730449893801892, "grad_norm": 0.7580824494361877, "learning_rate": 9.773777828782696e-05, "loss": 0.6775, "step": 1932 }, { "epoch": 0.3732380768488125, "grad_norm": 0.6506631374359131, "learning_rate": 9.773276440179885e-05, "loss": 0.7325, "step": 1933 }, { "epoch": 0.3734311643174358, "grad_norm": 0.8236773014068604, "learning_rate": 9.772774509453702e-05, "loss": 0.6301, "step": 1934 }, { "epoch": 0.37362425178605907, "grad_norm": 0.763280987739563, "learning_rate": 9.772272036661155e-05, "loss": 0.6046, "step": 1935 }, { "epoch": 0.37381733925468236, "grad_norm": 0.9129140973091125, "learning_rate": 9.771769021859312e-05, "loss": 0.7191, "step": 1936 }, { "epoch": 0.37401042672330564, "grad_norm": 0.5741595029830933, "learning_rate": 9.771265465105299e-05, "loss": 0.6668, "step": 1937 }, { "epoch": 0.37420351419192893, "grad_norm": 0.6385373473167419, "learning_rate": 9.770761366456311e-05, "loss": 0.6853, "step": 1938 }, { "epoch": 0.3743966016605522, "grad_norm": 0.999927818775177, "learning_rate": 9.7702567259696e-05, "loss": 0.6553, "step": 1939 }, { "epoch": 0.3745896891291755, "grad_norm": 5.931580543518066, "learning_rate": 9.769751543702479e-05, "loss": 0.6173, "step": 1940 }, { "epoch": 0.3747827765977988, "grad_norm": 0.6657626628875732, "learning_rate": 9.769245819712325e-05, "loss": 0.6232, "step": 1941 }, { "epoch": 0.3749758640664221, "grad_norm": 0.6442587375640869, "learning_rate": 9.768739554056576e-05, "loss": 0.719, "step": 1942 }, { "epoch": 0.37516895153504537, "grad_norm": 0.5471697449684143, "learning_rate": 9.768232746792732e-05, "loss": 0.6488, "step": 1943 }, { "epoch": 0.37536203900366866, "grad_norm": 1.8588284254074097, "learning_rate": 9.767725397978349e-05, "loss": 0.7037, "step": 1944 }, { "epoch": 0.37555512647229194, "grad_norm": 1.1184749603271484, "learning_rate": 9.767217507671053e-05, "loss": 0.7191, "step": 1945 }, { "epoch": 0.37574821394091523, "grad_norm": 0.6714403629302979, "learning_rate": 9.766709075928527e-05, "loss": 0.6262, "step": 1946 }, { "epoch": 0.3759413014095385, "grad_norm": 0.6078478693962097, "learning_rate": 9.766200102808516e-05, "loss": 0.6968, "step": 1947 }, { "epoch": 0.3761343888781618, "grad_norm": 0.570082426071167, "learning_rate": 9.765690588368824e-05, "loss": 0.6533, "step": 1948 }, { "epoch": 0.3763274763467851, "grad_norm": 0.557731568813324, "learning_rate": 9.765180532667322e-05, "loss": 0.6105, "step": 1949 }, { "epoch": 0.3765205638154084, "grad_norm": 0.6939077377319336, "learning_rate": 9.764669935761938e-05, "loss": 0.6352, "step": 1950 }, { "epoch": 0.37671365128403167, "grad_norm": 0.6368036866188049, "learning_rate": 9.764158797710663e-05, "loss": 0.7011, "step": 1951 }, { "epoch": 0.37690673875265496, "grad_norm": 0.7410091757774353, "learning_rate": 9.763647118571548e-05, "loss": 0.6277, "step": 1952 }, { "epoch": 0.37709982622127824, "grad_norm": 0.5275882482528687, "learning_rate": 9.763134898402709e-05, "loss": 0.7029, "step": 1953 }, { "epoch": 0.37729291368990153, "grad_norm": 1.7595189809799194, "learning_rate": 9.762622137262321e-05, "loss": 0.7555, "step": 1954 }, { "epoch": 0.3774860011585248, "grad_norm": 0.9702709317207336, "learning_rate": 9.762108835208619e-05, "loss": 0.6309, "step": 1955 }, { "epoch": 0.3776790886271481, "grad_norm": 0.7913571000099182, "learning_rate": 9.761594992299905e-05, "loss": 0.6554, "step": 1956 }, { "epoch": 0.3778721760957714, "grad_norm": 0.8357893824577332, "learning_rate": 9.761080608594533e-05, "loss": 0.753, "step": 1957 }, { "epoch": 0.3780652635643947, "grad_norm": 1.1051640510559082, "learning_rate": 9.760565684150927e-05, "loss": 0.6671, "step": 1958 }, { "epoch": 0.37825835103301797, "grad_norm": 0.6922770738601685, "learning_rate": 9.76005021902757e-05, "loss": 0.68, "step": 1959 }, { "epoch": 0.37845143850164126, "grad_norm": 0.5809647440910339, "learning_rate": 9.759534213283005e-05, "loss": 0.6387, "step": 1960 }, { "epoch": 0.37864452597026454, "grad_norm": 0.6997209191322327, "learning_rate": 9.759017666975836e-05, "loss": 0.714, "step": 1961 }, { "epoch": 0.37883761343888783, "grad_norm": 0.6086176633834839, "learning_rate": 9.758500580164731e-05, "loss": 0.6932, "step": 1962 }, { "epoch": 0.3790307009075111, "grad_norm": 1.1098549365997314, "learning_rate": 9.757982952908419e-05, "loss": 0.6786, "step": 1963 }, { "epoch": 0.3792237883761344, "grad_norm": 0.8288421630859375, "learning_rate": 9.757464785265687e-05, "loss": 0.6185, "step": 1964 }, { "epoch": 0.3794168758447577, "grad_norm": 0.9668926000595093, "learning_rate": 9.756946077295386e-05, "loss": 0.6741, "step": 1965 }, { "epoch": 0.379609963313381, "grad_norm": 0.7834420204162598, "learning_rate": 9.756426829056431e-05, "loss": 0.6836, "step": 1966 }, { "epoch": 0.37980305078200427, "grad_norm": 0.8341977000236511, "learning_rate": 9.755907040607792e-05, "loss": 0.6337, "step": 1967 }, { "epoch": 0.37999613825062756, "grad_norm": 1.0851131677627563, "learning_rate": 9.755386712008504e-05, "loss": 0.6887, "step": 1968 }, { "epoch": 0.38018922571925085, "grad_norm": 1.4804435968399048, "learning_rate": 9.754865843317666e-05, "loss": 0.7181, "step": 1969 }, { "epoch": 0.38038231318787413, "grad_norm": 0.6066175103187561, "learning_rate": 9.754344434594433e-05, "loss": 0.7165, "step": 1970 }, { "epoch": 0.3805754006564974, "grad_norm": 0.4885490834712982, "learning_rate": 9.753822485898023e-05, "loss": 0.578, "step": 1971 }, { "epoch": 0.38076848812512065, "grad_norm": 0.6382504105567932, "learning_rate": 9.753299997287721e-05, "loss": 0.671, "step": 1972 }, { "epoch": 0.38096157559374394, "grad_norm": 0.8185518383979797, "learning_rate": 9.752776968822863e-05, "loss": 0.6388, "step": 1973 }, { "epoch": 0.3811546630623672, "grad_norm": 0.6081629991531372, "learning_rate": 9.752253400562854e-05, "loss": 0.6893, "step": 1974 }, { "epoch": 0.3813477505309905, "grad_norm": 0.6406849026679993, "learning_rate": 9.75172929256716e-05, "loss": 0.6109, "step": 1975 }, { "epoch": 0.3815408379996138, "grad_norm": 0.49156874418258667, "learning_rate": 9.751204644895303e-05, "loss": 0.6658, "step": 1976 }, { "epoch": 0.3817339254682371, "grad_norm": 0.7030184268951416, "learning_rate": 9.75067945760687e-05, "loss": 0.6671, "step": 1977 }, { "epoch": 0.3819270129368604, "grad_norm": 0.5509622693061829, "learning_rate": 9.750153730761514e-05, "loss": 0.6297, "step": 1978 }, { "epoch": 0.38212010040548366, "grad_norm": 0.6154593229293823, "learning_rate": 9.749627464418937e-05, "loss": 0.665, "step": 1979 }, { "epoch": 0.38231318787410695, "grad_norm": 0.7645459175109863, "learning_rate": 9.749100658638914e-05, "loss": 0.6786, "step": 1980 }, { "epoch": 0.38250627534273024, "grad_norm": 0.9292107820510864, "learning_rate": 9.748573313481275e-05, "loss": 0.6183, "step": 1981 }, { "epoch": 0.3826993628113535, "grad_norm": 0.6786863207817078, "learning_rate": 9.748045429005915e-05, "loss": 0.6946, "step": 1982 }, { "epoch": 0.3828924502799768, "grad_norm": 0.8364476561546326, "learning_rate": 9.747517005272786e-05, "loss": 0.6522, "step": 1983 }, { "epoch": 0.3830855377486001, "grad_norm": 0.68668532371521, "learning_rate": 9.746988042341906e-05, "loss": 0.6724, "step": 1984 }, { "epoch": 0.3832786252172234, "grad_norm": 0.6321919560432434, "learning_rate": 9.74645854027335e-05, "loss": 0.6436, "step": 1985 }, { "epoch": 0.3834717126858467, "grad_norm": 1.3858685493469238, "learning_rate": 9.745928499127257e-05, "loss": 0.6397, "step": 1986 }, { "epoch": 0.38366480015446996, "grad_norm": 0.8787078857421875, "learning_rate": 9.745397918963826e-05, "loss": 0.6144, "step": 1987 }, { "epoch": 0.38385788762309325, "grad_norm": 0.7609953284263611, "learning_rate": 9.744866799843317e-05, "loss": 0.6639, "step": 1988 }, { "epoch": 0.38405097509171654, "grad_norm": 0.5662683844566345, "learning_rate": 9.744335141826052e-05, "loss": 0.5895, "step": 1989 }, { "epoch": 0.3842440625603398, "grad_norm": 0.9906890392303467, "learning_rate": 9.743802944972414e-05, "loss": 0.64, "step": 1990 }, { "epoch": 0.3844371500289631, "grad_norm": 0.8037384152412415, "learning_rate": 9.743270209342846e-05, "loss": 0.6632, "step": 1991 }, { "epoch": 0.3846302374975864, "grad_norm": 0.9726057052612305, "learning_rate": 9.742736934997855e-05, "loss": 0.6721, "step": 1992 }, { "epoch": 0.3848233249662097, "grad_norm": 0.6296117305755615, "learning_rate": 9.742203121998007e-05, "loss": 0.6521, "step": 1993 }, { "epoch": 0.385016412434833, "grad_norm": 2.146846294403076, "learning_rate": 9.74166877040393e-05, "loss": 0.7387, "step": 1994 }, { "epoch": 0.38520949990345627, "grad_norm": 1.1017601490020752, "learning_rate": 9.74113388027631e-05, "loss": 0.6533, "step": 1995 }, { "epoch": 0.38540258737207955, "grad_norm": 1.1163285970687866, "learning_rate": 9.740598451675899e-05, "loss": 0.6971, "step": 1996 }, { "epoch": 0.38559567484070284, "grad_norm": 1.0497568845748901, "learning_rate": 9.740062484663511e-05, "loss": 0.7466, "step": 1997 }, { "epoch": 0.3857887623093261, "grad_norm": 2.426905393600464, "learning_rate": 9.739525979300014e-05, "loss": 0.6788, "step": 1998 }, { "epoch": 0.3859818497779494, "grad_norm": 1.0539937019348145, "learning_rate": 9.738988935646343e-05, "loss": 0.6153, "step": 1999 }, { "epoch": 0.3861749372465727, "grad_norm": 0.6937062740325928, "learning_rate": 9.738451353763493e-05, "loss": 0.6379, "step": 2000 }, { "epoch": 0.3861749372465727, "eval_loss": 0.7134917974472046, "eval_runtime": 49.5945, "eval_samples_per_second": 13.389, "eval_steps_per_second": 0.423, "step": 2000 }, { "epoch": 0.386368024715196, "grad_norm": 0.5138224363327026, "learning_rate": 9.737913233712519e-05, "loss": 0.6588, "step": 2001 }, { "epoch": 0.3865611121838193, "grad_norm": 1.8367615938186646, "learning_rate": 9.737374575554536e-05, "loss": 0.6508, "step": 2002 }, { "epoch": 0.38675419965244257, "grad_norm": 1.1340997219085693, "learning_rate": 9.736835379350727e-05, "loss": 0.7254, "step": 2003 }, { "epoch": 0.38694728712106585, "grad_norm": 1.239969253540039, "learning_rate": 9.736295645162327e-05, "loss": 0.6672, "step": 2004 }, { "epoch": 0.38714037458968914, "grad_norm": 1.1792171001434326, "learning_rate": 9.735755373050636e-05, "loss": 0.669, "step": 2005 }, { "epoch": 0.3873334620583124, "grad_norm": 2.392179250717163, "learning_rate": 9.735214563077019e-05, "loss": 0.6716, "step": 2006 }, { "epoch": 0.3875265495269357, "grad_norm": 0.9812596440315247, "learning_rate": 9.734673215302894e-05, "loss": 0.7645, "step": 2007 }, { "epoch": 0.387719636995559, "grad_norm": 0.7820072770118713, "learning_rate": 9.734131329789746e-05, "loss": 0.6485, "step": 2008 }, { "epoch": 0.3879127244641823, "grad_norm": 1.1823099851608276, "learning_rate": 9.73358890659912e-05, "loss": 0.6893, "step": 2009 }, { "epoch": 0.3881058119328056, "grad_norm": 0.7622960805892944, "learning_rate": 9.733045945792622e-05, "loss": 0.5676, "step": 2010 }, { "epoch": 0.38829889940142887, "grad_norm": 0.7072319388389587, "learning_rate": 9.732502447431917e-05, "loss": 0.6927, "step": 2011 }, { "epoch": 0.38849198687005215, "grad_norm": 1.0730304718017578, "learning_rate": 9.731958411578734e-05, "loss": 0.6794, "step": 2012 }, { "epoch": 0.38868507433867544, "grad_norm": 0.6131848692893982, "learning_rate": 9.73141383829486e-05, "loss": 0.6738, "step": 2013 }, { "epoch": 0.38887816180729873, "grad_norm": 0.5214424729347229, "learning_rate": 9.730868727642147e-05, "loss": 0.6877, "step": 2014 }, { "epoch": 0.389071249275922, "grad_norm": 0.9376972913742065, "learning_rate": 9.730323079682502e-05, "loss": 0.6307, "step": 2015 }, { "epoch": 0.3892643367445453, "grad_norm": 1.2275638580322266, "learning_rate": 9.729776894477903e-05, "loss": 0.7248, "step": 2016 }, { "epoch": 0.3894574242131686, "grad_norm": 0.8265889286994934, "learning_rate": 9.729230172090378e-05, "loss": 0.6295, "step": 2017 }, { "epoch": 0.3896505116817919, "grad_norm": 1.085730791091919, "learning_rate": 9.728682912582021e-05, "loss": 0.6554, "step": 2018 }, { "epoch": 0.38984359915041517, "grad_norm": 0.8170695304870605, "learning_rate": 9.728135116014989e-05, "loss": 0.6474, "step": 2019 }, { "epoch": 0.3900366866190384, "grad_norm": 0.6733046174049377, "learning_rate": 9.727586782451496e-05, "loss": 0.6325, "step": 2020 }, { "epoch": 0.3902297740876617, "grad_norm": 1.1665005683898926, "learning_rate": 9.727037911953821e-05, "loss": 0.6685, "step": 2021 }, { "epoch": 0.390422861556285, "grad_norm": 1.0426594018936157, "learning_rate": 9.726488504584296e-05, "loss": 0.6394, "step": 2022 }, { "epoch": 0.39061594902490826, "grad_norm": 0.6103055477142334, "learning_rate": 9.725938560405328e-05, "loss": 0.6627, "step": 2023 }, { "epoch": 0.39080903649353155, "grad_norm": 0.9538660049438477, "learning_rate": 9.725388079479373e-05, "loss": 0.6351, "step": 2024 }, { "epoch": 0.39100212396215484, "grad_norm": 0.9036869406700134, "learning_rate": 9.72483706186895e-05, "loss": 0.6119, "step": 2025 }, { "epoch": 0.3911952114307781, "grad_norm": 0.6326113939285278, "learning_rate": 9.724285507636643e-05, "loss": 0.7387, "step": 2026 }, { "epoch": 0.3913882988994014, "grad_norm": 0.9695148468017578, "learning_rate": 9.723733416845093e-05, "loss": 0.6491, "step": 2027 }, { "epoch": 0.3915813863680247, "grad_norm": 0.7129656076431274, "learning_rate": 9.723180789557005e-05, "loss": 0.6274, "step": 2028 }, { "epoch": 0.391774473836648, "grad_norm": 0.9028733968734741, "learning_rate": 9.722627625835143e-05, "loss": 0.707, "step": 2029 }, { "epoch": 0.3919675613052713, "grad_norm": 0.8408252596855164, "learning_rate": 9.722073925742332e-05, "loss": 0.6837, "step": 2030 }, { "epoch": 0.39216064877389456, "grad_norm": 0.7530283331871033, "learning_rate": 9.721519689341459e-05, "loss": 0.6241, "step": 2031 }, { "epoch": 0.39235373624251785, "grad_norm": 0.9310856461524963, "learning_rate": 9.72096491669547e-05, "loss": 0.6163, "step": 2032 }, { "epoch": 0.39254682371114114, "grad_norm": 1.0483717918395996, "learning_rate": 9.720409607867373e-05, "loss": 0.6616, "step": 2033 }, { "epoch": 0.3927399111797644, "grad_norm": 1.3022147417068481, "learning_rate": 9.719853762920239e-05, "loss": 0.7245, "step": 2034 }, { "epoch": 0.3929329986483877, "grad_norm": 0.946922242641449, "learning_rate": 9.719297381917196e-05, "loss": 0.6746, "step": 2035 }, { "epoch": 0.393126086117011, "grad_norm": 0.9742671847343445, "learning_rate": 9.718740464921437e-05, "loss": 0.6423, "step": 2036 }, { "epoch": 0.3933191735856343, "grad_norm": 0.6125755906105042, "learning_rate": 9.718183011996211e-05, "loss": 0.7276, "step": 2037 }, { "epoch": 0.3935122610542576, "grad_norm": 0.9616541266441345, "learning_rate": 9.717625023204832e-05, "loss": 0.6913, "step": 2038 }, { "epoch": 0.39370534852288086, "grad_norm": 2.050795316696167, "learning_rate": 9.717066498610673e-05, "loss": 0.6897, "step": 2039 }, { "epoch": 0.39389843599150415, "grad_norm": 2.9853978157043457, "learning_rate": 9.716507438277169e-05, "loss": 0.7391, "step": 2040 }, { "epoch": 0.39409152346012744, "grad_norm": 0.6971909999847412, "learning_rate": 9.715947842267814e-05, "loss": 0.6999, "step": 2041 }, { "epoch": 0.3942846109287507, "grad_norm": 0.838578999042511, "learning_rate": 9.715387710646165e-05, "loss": 0.7027, "step": 2042 }, { "epoch": 0.394477698397374, "grad_norm": 1.8661590814590454, "learning_rate": 9.714827043475838e-05, "loss": 0.6523, "step": 2043 }, { "epoch": 0.3946707858659973, "grad_norm": 0.977450966835022, "learning_rate": 9.714265840820511e-05, "loss": 0.6531, "step": 2044 }, { "epoch": 0.3948638733346206, "grad_norm": 1.0662957429885864, "learning_rate": 9.713704102743922e-05, "loss": 0.7079, "step": 2045 }, { "epoch": 0.3950569608032439, "grad_norm": 1.0293240547180176, "learning_rate": 9.713141829309871e-05, "loss": 0.6752, "step": 2046 }, { "epoch": 0.39525004827186716, "grad_norm": 0.7046149969100952, "learning_rate": 9.712579020582218e-05, "loss": 0.6207, "step": 2047 }, { "epoch": 0.39544313574049045, "grad_norm": 0.9247370362281799, "learning_rate": 9.712015676624882e-05, "loss": 0.6749, "step": 2048 }, { "epoch": 0.39563622320911374, "grad_norm": 0.9141973853111267, "learning_rate": 9.711451797501847e-05, "loss": 0.6373, "step": 2049 }, { "epoch": 0.395829310677737, "grad_norm": 0.9949089288711548, "learning_rate": 9.710887383277154e-05, "loss": 0.6347, "step": 2050 }, { "epoch": 0.3960223981463603, "grad_norm": 0.7220017313957214, "learning_rate": 9.710322434014907e-05, "loss": 0.6396, "step": 2051 }, { "epoch": 0.3962154856149836, "grad_norm": 0.7320256233215332, "learning_rate": 9.709756949779268e-05, "loss": 0.6472, "step": 2052 }, { "epoch": 0.3964085730836069, "grad_norm": 1.2128016948699951, "learning_rate": 9.709190930634464e-05, "loss": 0.6649, "step": 2053 }, { "epoch": 0.3966016605522302, "grad_norm": 0.6875364780426025, "learning_rate": 9.708624376644781e-05, "loss": 0.6754, "step": 2054 }, { "epoch": 0.39679474802085346, "grad_norm": 1.074898600578308, "learning_rate": 9.708057287874562e-05, "loss": 0.6076, "step": 2055 }, { "epoch": 0.39698783548947675, "grad_norm": 1.8511162996292114, "learning_rate": 9.707489664388216e-05, "loss": 0.6771, "step": 2056 }, { "epoch": 0.39718092295810004, "grad_norm": 0.6662883758544922, "learning_rate": 9.706921506250211e-05, "loss": 0.6463, "step": 2057 }, { "epoch": 0.3973740104267233, "grad_norm": 1.0017058849334717, "learning_rate": 9.706352813525074e-05, "loss": 0.6607, "step": 2058 }, { "epoch": 0.3975670978953466, "grad_norm": 0.6708621382713318, "learning_rate": 9.705783586277396e-05, "loss": 0.6905, "step": 2059 }, { "epoch": 0.3977601853639699, "grad_norm": 1.3274545669555664, "learning_rate": 9.705213824571825e-05, "loss": 0.6506, "step": 2060 }, { "epoch": 0.3979532728325932, "grad_norm": 0.7926072478294373, "learning_rate": 9.704643528473072e-05, "loss": 0.6488, "step": 2061 }, { "epoch": 0.3981463603012165, "grad_norm": 0.8002334237098694, "learning_rate": 9.70407269804591e-05, "loss": 0.6579, "step": 2062 }, { "epoch": 0.39833944776983976, "grad_norm": 0.7025941014289856, "learning_rate": 9.703501333355168e-05, "loss": 0.7062, "step": 2063 }, { "epoch": 0.39853253523846305, "grad_norm": 0.9721103310585022, "learning_rate": 9.702929434465738e-05, "loss": 0.6955, "step": 2064 }, { "epoch": 0.39872562270708634, "grad_norm": 0.8779545426368713, "learning_rate": 9.702357001442577e-05, "loss": 0.6321, "step": 2065 }, { "epoch": 0.3989187101757096, "grad_norm": 0.8009744882583618, "learning_rate": 9.701784034350698e-05, "loss": 0.637, "step": 2066 }, { "epoch": 0.39911179764433286, "grad_norm": 1.8232556581497192, "learning_rate": 9.701210533255172e-05, "loss": 0.7615, "step": 2067 }, { "epoch": 0.39930488511295614, "grad_norm": 0.6255858540534973, "learning_rate": 9.700636498221138e-05, "loss": 0.6886, "step": 2068 }, { "epoch": 0.39949797258157943, "grad_norm": 0.736965000629425, "learning_rate": 9.70006192931379e-05, "loss": 0.7108, "step": 2069 }, { "epoch": 0.3996910600502027, "grad_norm": 0.7532983422279358, "learning_rate": 9.699486826598388e-05, "loss": 0.6611, "step": 2070 }, { "epoch": 0.399884147518826, "grad_norm": 0.8830270171165466, "learning_rate": 9.698911190140243e-05, "loss": 0.6763, "step": 2071 }, { "epoch": 0.4000772349874493, "grad_norm": 0.5585283637046814, "learning_rate": 9.698335020004736e-05, "loss": 0.6903, "step": 2072 }, { "epoch": 0.4002703224560726, "grad_norm": 0.6163680553436279, "learning_rate": 9.697758316257306e-05, "loss": 0.7166, "step": 2073 }, { "epoch": 0.40046340992469587, "grad_norm": 1.0224374532699585, "learning_rate": 9.697181078963452e-05, "loss": 0.6559, "step": 2074 }, { "epoch": 0.40065649739331916, "grad_norm": 1.058501124382019, "learning_rate": 9.696603308188732e-05, "loss": 0.692, "step": 2075 }, { "epoch": 0.40084958486194244, "grad_norm": 0.8760178089141846, "learning_rate": 9.696025003998766e-05, "loss": 0.6425, "step": 2076 }, { "epoch": 0.40104267233056573, "grad_norm": 3.0668373107910156, "learning_rate": 9.695446166459237e-05, "loss": 0.7065, "step": 2077 }, { "epoch": 0.401235759799189, "grad_norm": 0.6772164106369019, "learning_rate": 9.694866795635884e-05, "loss": 0.6297, "step": 2078 }, { "epoch": 0.4014288472678123, "grad_norm": 0.8507019877433777, "learning_rate": 9.69428689159451e-05, "loss": 0.7283, "step": 2079 }, { "epoch": 0.4016219347364356, "grad_norm": 0.8579272031784058, "learning_rate": 9.693706454400978e-05, "loss": 0.7433, "step": 2080 }, { "epoch": 0.4018150222050589, "grad_norm": 0.6423399448394775, "learning_rate": 9.69312548412121e-05, "loss": 0.6958, "step": 2081 }, { "epoch": 0.40200810967368217, "grad_norm": 1.0026085376739502, "learning_rate": 9.692543980821189e-05, "loss": 0.6996, "step": 2082 }, { "epoch": 0.40220119714230546, "grad_norm": 0.5742931365966797, "learning_rate": 9.691961944566959e-05, "loss": 0.6675, "step": 2083 }, { "epoch": 0.40239428461092874, "grad_norm": 1.0103346109390259, "learning_rate": 9.691379375424625e-05, "loss": 0.6129, "step": 2084 }, { "epoch": 0.40258737207955203, "grad_norm": 1.4002872705459595, "learning_rate": 9.690796273460354e-05, "loss": 0.6832, "step": 2085 }, { "epoch": 0.4027804595481753, "grad_norm": 0.8471243381500244, "learning_rate": 9.69021263874037e-05, "loss": 0.7219, "step": 2086 }, { "epoch": 0.4029735470167986, "grad_norm": 0.5986262559890747, "learning_rate": 9.689628471330958e-05, "loss": 0.6743, "step": 2087 }, { "epoch": 0.4031666344854219, "grad_norm": 0.5123478770256042, "learning_rate": 9.689043771298468e-05, "loss": 0.7008, "step": 2088 }, { "epoch": 0.4033597219540452, "grad_norm": 0.7429570555686951, "learning_rate": 9.688458538709302e-05, "loss": 0.6876, "step": 2089 }, { "epoch": 0.40355280942266847, "grad_norm": 0.6503498554229736, "learning_rate": 9.687872773629931e-05, "loss": 0.6279, "step": 2090 }, { "epoch": 0.40374589689129176, "grad_norm": 0.5559462308883667, "learning_rate": 9.687286476126885e-05, "loss": 0.6215, "step": 2091 }, { "epoch": 0.40393898435991504, "grad_norm": 0.6674103140830994, "learning_rate": 9.686699646266748e-05, "loss": 0.6397, "step": 2092 }, { "epoch": 0.40413207182853833, "grad_norm": 1.2193019390106201, "learning_rate": 9.686112284116171e-05, "loss": 0.634, "step": 2093 }, { "epoch": 0.4043251592971616, "grad_norm": 0.5798696875572205, "learning_rate": 9.685524389741864e-05, "loss": 0.645, "step": 2094 }, { "epoch": 0.4045182467657849, "grad_norm": 2.5379645824432373, "learning_rate": 9.684935963210598e-05, "loss": 0.6596, "step": 2095 }, { "epoch": 0.4047113342344082, "grad_norm": 0.5806416869163513, "learning_rate": 9.6843470045892e-05, "loss": 0.7319, "step": 2096 }, { "epoch": 0.4049044217030315, "grad_norm": 0.8834619522094727, "learning_rate": 9.683757513944565e-05, "loss": 0.6748, "step": 2097 }, { "epoch": 0.40509750917165477, "grad_norm": 0.5740428566932678, "learning_rate": 9.68316749134364e-05, "loss": 0.6203, "step": 2098 }, { "epoch": 0.40529059664027806, "grad_norm": 0.7449142336845398, "learning_rate": 9.682576936853438e-05, "loss": 0.735, "step": 2099 }, { "epoch": 0.40548368410890134, "grad_norm": 0.8648251891136169, "learning_rate": 9.681985850541034e-05, "loss": 0.6772, "step": 2100 }, { "epoch": 0.40567677157752463, "grad_norm": 0.642600417137146, "learning_rate": 9.681394232473556e-05, "loss": 0.6817, "step": 2101 }, { "epoch": 0.4058698590461479, "grad_norm": 0.8760634064674377, "learning_rate": 9.680802082718199e-05, "loss": 0.7039, "step": 2102 }, { "epoch": 0.4060629465147712, "grad_norm": 0.682060182094574, "learning_rate": 9.680209401342217e-05, "loss": 0.7258, "step": 2103 }, { "epoch": 0.4062560339833945, "grad_norm": 0.8739725947380066, "learning_rate": 9.679616188412923e-05, "loss": 0.6763, "step": 2104 }, { "epoch": 0.4064491214520178, "grad_norm": 0.9422621726989746, "learning_rate": 9.67902244399769e-05, "loss": 0.6835, "step": 2105 }, { "epoch": 0.40664220892064107, "grad_norm": 0.6692370176315308, "learning_rate": 9.678428168163953e-05, "loss": 0.6782, "step": 2106 }, { "epoch": 0.40683529638926436, "grad_norm": 0.893266499042511, "learning_rate": 9.677833360979205e-05, "loss": 0.6462, "step": 2107 }, { "epoch": 0.40702838385788764, "grad_norm": 0.6488014459609985, "learning_rate": 9.677238022511007e-05, "loss": 0.6375, "step": 2108 }, { "epoch": 0.40722147132651093, "grad_norm": 0.9776201844215393, "learning_rate": 9.676642152826967e-05, "loss": 0.6993, "step": 2109 }, { "epoch": 0.4074145587951342, "grad_norm": 0.8124518990516663, "learning_rate": 9.676045751994764e-05, "loss": 0.7011, "step": 2110 }, { "epoch": 0.4076076462637575, "grad_norm": 1.1938745975494385, "learning_rate": 9.675448820082134e-05, "loss": 0.6424, "step": 2111 }, { "epoch": 0.4078007337323808, "grad_norm": 1.4950124025344849, "learning_rate": 9.674851357156874e-05, "loss": 0.651, "step": 2112 }, { "epoch": 0.4079938212010041, "grad_norm": 1.1327846050262451, "learning_rate": 9.674253363286838e-05, "loss": 0.6659, "step": 2113 }, { "epoch": 0.40818690866962737, "grad_norm": 0.9231230020523071, "learning_rate": 9.673654838539947e-05, "loss": 0.6783, "step": 2114 }, { "epoch": 0.4083799961382506, "grad_norm": 0.8122774958610535, "learning_rate": 9.673055782984174e-05, "loss": 0.6875, "step": 2115 }, { "epoch": 0.4085730836068739, "grad_norm": 0.9157958626747131, "learning_rate": 9.67245619668756e-05, "loss": 0.6873, "step": 2116 }, { "epoch": 0.4087661710754972, "grad_norm": 1.4033539295196533, "learning_rate": 9.671856079718203e-05, "loss": 0.5916, "step": 2117 }, { "epoch": 0.40895925854412046, "grad_norm": 0.7888469099998474, "learning_rate": 9.671255432144257e-05, "loss": 0.6496, "step": 2118 }, { "epoch": 0.40915234601274375, "grad_norm": 0.8857483267784119, "learning_rate": 9.670654254033944e-05, "loss": 0.6604, "step": 2119 }, { "epoch": 0.40934543348136704, "grad_norm": 1.4208102226257324, "learning_rate": 9.670052545455542e-05, "loss": 0.6732, "step": 2120 }, { "epoch": 0.4095385209499903, "grad_norm": 0.6150051355361938, "learning_rate": 9.669450306477389e-05, "loss": 0.6299, "step": 2121 }, { "epoch": 0.4097316084186136, "grad_norm": 0.9208278656005859, "learning_rate": 9.668847537167885e-05, "loss": 0.6248, "step": 2122 }, { "epoch": 0.4099246958872369, "grad_norm": 0.7390231490135193, "learning_rate": 9.668244237595487e-05, "loss": 0.6307, "step": 2123 }, { "epoch": 0.4101177833558602, "grad_norm": 0.8486349582672119, "learning_rate": 9.667640407828717e-05, "loss": 0.5802, "step": 2124 }, { "epoch": 0.4103108708244835, "grad_norm": 1.0951178073883057, "learning_rate": 9.667036047936155e-05, "loss": 0.6681, "step": 2125 }, { "epoch": 0.41050395829310676, "grad_norm": 0.9737458825111389, "learning_rate": 9.66643115798644e-05, "loss": 0.6213, "step": 2126 }, { "epoch": 0.41069704576173005, "grad_norm": 0.9173923134803772, "learning_rate": 9.665825738048273e-05, "loss": 0.7054, "step": 2127 }, { "epoch": 0.41089013323035334, "grad_norm": 1.4449328184127808, "learning_rate": 9.665219788190414e-05, "loss": 0.7117, "step": 2128 }, { "epoch": 0.4110832206989766, "grad_norm": 1.8331079483032227, "learning_rate": 9.664613308481683e-05, "loss": 0.6496, "step": 2129 }, { "epoch": 0.4112763081675999, "grad_norm": 1.9013755321502686, "learning_rate": 9.664006298990959e-05, "loss": 0.6905, "step": 2130 }, { "epoch": 0.4114693956362232, "grad_norm": 1.147947907447815, "learning_rate": 9.663398759787187e-05, "loss": 0.6465, "step": 2131 }, { "epoch": 0.4116624831048465, "grad_norm": 0.7271201610565186, "learning_rate": 9.662790690939365e-05, "loss": 0.6537, "step": 2132 }, { "epoch": 0.4118555705734698, "grad_norm": 0.8337309956550598, "learning_rate": 9.662182092516557e-05, "loss": 0.6365, "step": 2133 }, { "epoch": 0.41204865804209306, "grad_norm": 1.484628438949585, "learning_rate": 9.66157296458788e-05, "loss": 0.6247, "step": 2134 }, { "epoch": 0.41224174551071635, "grad_norm": 0.9642316102981567, "learning_rate": 9.660963307222519e-05, "loss": 0.6243, "step": 2135 }, { "epoch": 0.41243483297933964, "grad_norm": 2.613215684890747, "learning_rate": 9.660353120489716e-05, "loss": 0.6634, "step": 2136 }, { "epoch": 0.4126279204479629, "grad_norm": 1.030038595199585, "learning_rate": 9.659742404458772e-05, "loss": 0.6081, "step": 2137 }, { "epoch": 0.4128210079165862, "grad_norm": 1.9329754114151, "learning_rate": 9.659131159199046e-05, "loss": 0.6741, "step": 2138 }, { "epoch": 0.4130140953852095, "grad_norm": 0.7088358998298645, "learning_rate": 9.658519384779964e-05, "loss": 0.5984, "step": 2139 }, { "epoch": 0.4132071828538328, "grad_norm": 0.7329550385475159, "learning_rate": 9.657907081271006e-05, "loss": 0.6007, "step": 2140 }, { "epoch": 0.4134002703224561, "grad_norm": 0.7249340415000916, "learning_rate": 9.657294248741715e-05, "loss": 0.6454, "step": 2141 }, { "epoch": 0.41359335779107936, "grad_norm": 0.6823620796203613, "learning_rate": 9.656680887261693e-05, "loss": 0.6368, "step": 2142 }, { "epoch": 0.41378644525970265, "grad_norm": 1.2156535387039185, "learning_rate": 9.656066996900601e-05, "loss": 0.6181, "step": 2143 }, { "epoch": 0.41397953272832594, "grad_norm": 1.3191896677017212, "learning_rate": 9.655452577728164e-05, "loss": 0.7419, "step": 2144 }, { "epoch": 0.4141726201969492, "grad_norm": 0.6475149989128113, "learning_rate": 9.654837629814164e-05, "loss": 0.6159, "step": 2145 }, { "epoch": 0.4143657076655725, "grad_norm": 0.9029530882835388, "learning_rate": 9.65422215322844e-05, "loss": 0.7217, "step": 2146 }, { "epoch": 0.4145587951341958, "grad_norm": 0.9543362855911255, "learning_rate": 9.6536061480409e-05, "loss": 0.6657, "step": 2147 }, { "epoch": 0.4147518826028191, "grad_norm": 0.7387669682502747, "learning_rate": 9.652989614321504e-05, "loss": 0.6939, "step": 2148 }, { "epoch": 0.4149449700714424, "grad_norm": 1.7265998125076294, "learning_rate": 9.652372552140272e-05, "loss": 0.7301, "step": 2149 }, { "epoch": 0.41513805754006566, "grad_norm": 0.849612832069397, "learning_rate": 9.65175496156729e-05, "loss": 0.7085, "step": 2150 }, { "epoch": 0.41533114500868895, "grad_norm": 0.8606327176094055, "learning_rate": 9.651136842672702e-05, "loss": 0.6476, "step": 2151 }, { "epoch": 0.41552423247731224, "grad_norm": 1.0934696197509766, "learning_rate": 9.650518195526705e-05, "loss": 0.7028, "step": 2152 }, { "epoch": 0.4157173199459355, "grad_norm": 0.7838343977928162, "learning_rate": 9.649899020199566e-05, "loss": 0.6643, "step": 2153 }, { "epoch": 0.4159104074145588, "grad_norm": 1.2027345895767212, "learning_rate": 9.649279316761608e-05, "loss": 0.6706, "step": 2154 }, { "epoch": 0.4161034948831821, "grad_norm": 1.4937772750854492, "learning_rate": 9.648659085283211e-05, "loss": 0.7345, "step": 2155 }, { "epoch": 0.4162965823518054, "grad_norm": 1.0963600873947144, "learning_rate": 9.64803832583482e-05, "loss": 0.6916, "step": 2156 }, { "epoch": 0.4164896698204287, "grad_norm": 2.9454100131988525, "learning_rate": 9.647417038486935e-05, "loss": 0.6301, "step": 2157 }, { "epoch": 0.41668275728905196, "grad_norm": 0.9281861782073975, "learning_rate": 9.64679522331012e-05, "loss": 0.7122, "step": 2158 }, { "epoch": 0.41687584475767525, "grad_norm": 1.159866452217102, "learning_rate": 9.646172880375e-05, "loss": 0.6325, "step": 2159 }, { "epoch": 0.41706893222629854, "grad_norm": 1.4899334907531738, "learning_rate": 9.645550009752253e-05, "loss": 0.6197, "step": 2160 }, { "epoch": 0.4172620196949218, "grad_norm": 1.4082077741622925, "learning_rate": 9.644926611512623e-05, "loss": 0.6818, "step": 2161 }, { "epoch": 0.41745510716354506, "grad_norm": 0.959583580493927, "learning_rate": 9.644302685726913e-05, "loss": 0.6219, "step": 2162 }, { "epoch": 0.41764819463216835, "grad_norm": 1.5002087354660034, "learning_rate": 9.643678232465986e-05, "loss": 0.6923, "step": 2163 }, { "epoch": 0.41784128210079163, "grad_norm": 1.6579053401947021, "learning_rate": 9.64305325180076e-05, "loss": 0.6464, "step": 2164 }, { "epoch": 0.4180343695694149, "grad_norm": 3.163287401199341, "learning_rate": 9.642427743802222e-05, "loss": 0.618, "step": 2165 }, { "epoch": 0.4182274570380382, "grad_norm": 0.8907997608184814, "learning_rate": 9.641801708541411e-05, "loss": 0.6369, "step": 2166 }, { "epoch": 0.4184205445066615, "grad_norm": 1.709773063659668, "learning_rate": 9.641175146089429e-05, "loss": 0.6484, "step": 2167 }, { "epoch": 0.4186136319752848, "grad_norm": 0.7581996917724609, "learning_rate": 9.640548056517437e-05, "loss": 0.6451, "step": 2168 }, { "epoch": 0.41880671944390807, "grad_norm": 0.7713298201560974, "learning_rate": 9.63992043989666e-05, "loss": 0.7649, "step": 2169 }, { "epoch": 0.41899980691253136, "grad_norm": 1.2634636163711548, "learning_rate": 9.639292296298374e-05, "loss": 0.6945, "step": 2170 }, { "epoch": 0.41919289438115465, "grad_norm": 1.8603992462158203, "learning_rate": 9.638663625793925e-05, "loss": 0.6809, "step": 2171 }, { "epoch": 0.41938598184977793, "grad_norm": 2.839536190032959, "learning_rate": 9.638034428454712e-05, "loss": 0.5916, "step": 2172 }, { "epoch": 0.4195790693184012, "grad_norm": 1.0049793720245361, "learning_rate": 9.637404704352196e-05, "loss": 0.6804, "step": 2173 }, { "epoch": 0.4197721567870245, "grad_norm": 1.2186213731765747, "learning_rate": 9.636774453557895e-05, "loss": 0.7185, "step": 2174 }, { "epoch": 0.4199652442556478, "grad_norm": 1.5248589515686035, "learning_rate": 9.636143676143395e-05, "loss": 0.6249, "step": 2175 }, { "epoch": 0.4201583317242711, "grad_norm": 0.6700448393821716, "learning_rate": 9.635512372180333e-05, "loss": 0.6458, "step": 2176 }, { "epoch": 0.42035141919289437, "grad_norm": 0.8223052620887756, "learning_rate": 9.63488054174041e-05, "loss": 0.6762, "step": 2177 }, { "epoch": 0.42054450666151766, "grad_norm": 0.9074676036834717, "learning_rate": 9.634248184895385e-05, "loss": 0.623, "step": 2178 }, { "epoch": 0.42073759413014095, "grad_norm": 2.523494243621826, "learning_rate": 9.63361530171708e-05, "loss": 0.6753, "step": 2179 }, { "epoch": 0.42093068159876423, "grad_norm": 0.782922625541687, "learning_rate": 9.63298189227737e-05, "loss": 0.7123, "step": 2180 }, { "epoch": 0.4211237690673875, "grad_norm": 0.9789798259735107, "learning_rate": 9.6323479566482e-05, "loss": 0.6137, "step": 2181 }, { "epoch": 0.4213168565360108, "grad_norm": 0.8360668420791626, "learning_rate": 9.631713494901566e-05, "loss": 0.6628, "step": 2182 }, { "epoch": 0.4215099440046341, "grad_norm": 1.084025502204895, "learning_rate": 9.631078507109525e-05, "loss": 0.6698, "step": 2183 }, { "epoch": 0.4217030314732574, "grad_norm": 0.9836483597755432, "learning_rate": 9.6304429933442e-05, "loss": 0.6839, "step": 2184 }, { "epoch": 0.42189611894188067, "grad_norm": 1.0710245370864868, "learning_rate": 9.629806953677764e-05, "loss": 0.658, "step": 2185 }, { "epoch": 0.42208920641050396, "grad_norm": 0.8995723128318787, "learning_rate": 9.62917038818246e-05, "loss": 0.7375, "step": 2186 }, { "epoch": 0.42228229387912725, "grad_norm": 1.059083104133606, "learning_rate": 9.628533296930583e-05, "loss": 0.6666, "step": 2187 }, { "epoch": 0.42247538134775053, "grad_norm": 1.309341311454773, "learning_rate": 9.627895679994493e-05, "loss": 0.651, "step": 2188 }, { "epoch": 0.4226684688163738, "grad_norm": 1.1353890895843506, "learning_rate": 9.627257537446601e-05, "loss": 0.6884, "step": 2189 }, { "epoch": 0.4228615562849971, "grad_norm": 0.8550171852111816, "learning_rate": 9.62661886935939e-05, "loss": 0.685, "step": 2190 }, { "epoch": 0.4230546437536204, "grad_norm": 1.2330129146575928, "learning_rate": 9.625979675805396e-05, "loss": 0.611, "step": 2191 }, { "epoch": 0.4232477312222437, "grad_norm": 0.9843806624412537, "learning_rate": 9.625339956857212e-05, "loss": 0.7138, "step": 2192 }, { "epoch": 0.42344081869086697, "grad_norm": 3.158026933670044, "learning_rate": 9.624699712587496e-05, "loss": 0.6395, "step": 2193 }, { "epoch": 0.42363390615949026, "grad_norm": 1.4909261465072632, "learning_rate": 9.624058943068963e-05, "loss": 0.6765, "step": 2194 }, { "epoch": 0.42382699362811355, "grad_norm": 1.5778098106384277, "learning_rate": 9.623417648374389e-05, "loss": 0.7036, "step": 2195 }, { "epoch": 0.42402008109673683, "grad_norm": 1.2993004322052002, "learning_rate": 9.622775828576607e-05, "loss": 0.6753, "step": 2196 }, { "epoch": 0.4242131685653601, "grad_norm": 0.9829937815666199, "learning_rate": 9.622133483748514e-05, "loss": 0.663, "step": 2197 }, { "epoch": 0.4244062560339834, "grad_norm": 0.8210488557815552, "learning_rate": 9.621490613963063e-05, "loss": 0.6561, "step": 2198 }, { "epoch": 0.4245993435026067, "grad_norm": 1.8945502042770386, "learning_rate": 9.620847219293266e-05, "loss": 0.6755, "step": 2199 }, { "epoch": 0.42479243097123, "grad_norm": 1.8600435256958008, "learning_rate": 9.620203299812199e-05, "loss": 0.6675, "step": 2200 }, { "epoch": 0.42498551843985327, "grad_norm": 1.6228619813919067, "learning_rate": 9.619558855592994e-05, "loss": 0.6445, "step": 2201 }, { "epoch": 0.42517860590847656, "grad_norm": 1.1327885389328003, "learning_rate": 9.618913886708843e-05, "loss": 0.7004, "step": 2202 }, { "epoch": 0.42537169337709985, "grad_norm": 0.7903538346290588, "learning_rate": 9.618268393232998e-05, "loss": 0.6775, "step": 2203 }, { "epoch": 0.42556478084572313, "grad_norm": 1.3246452808380127, "learning_rate": 9.617622375238772e-05, "loss": 0.5949, "step": 2204 }, { "epoch": 0.4257578683143464, "grad_norm": 1.2443729639053345, "learning_rate": 9.616975832799535e-05, "loss": 0.6905, "step": 2205 }, { "epoch": 0.4259509557829697, "grad_norm": 1.1472927331924438, "learning_rate": 9.616328765988718e-05, "loss": 0.6501, "step": 2206 }, { "epoch": 0.426144043251593, "grad_norm": 1.880237102508545, "learning_rate": 9.615681174879812e-05, "loss": 0.604, "step": 2207 }, { "epoch": 0.4263371307202163, "grad_norm": 1.057161569595337, "learning_rate": 9.615033059546368e-05, "loss": 0.7139, "step": 2208 }, { "epoch": 0.42653021818883957, "grad_norm": 3.7181100845336914, "learning_rate": 9.614384420061992e-05, "loss": 0.6739, "step": 2209 }, { "epoch": 0.4267233056574628, "grad_norm": 3.211777925491333, "learning_rate": 9.613735256500358e-05, "loss": 0.6961, "step": 2210 }, { "epoch": 0.4269163931260861, "grad_norm": 2.3468949794769287, "learning_rate": 9.61308556893519e-05, "loss": 0.6468, "step": 2211 }, { "epoch": 0.4271094805947094, "grad_norm": 2.369316339492798, "learning_rate": 9.612435357440279e-05, "loss": 0.612, "step": 2212 }, { "epoch": 0.42730256806333267, "grad_norm": 1.2178980112075806, "learning_rate": 9.611784622089471e-05, "loss": 0.766, "step": 2213 }, { "epoch": 0.42749565553195595, "grad_norm": 1.128402590751648, "learning_rate": 9.611133362956672e-05, "loss": 0.6825, "step": 2214 }, { "epoch": 0.42768874300057924, "grad_norm": 1.2934259176254272, "learning_rate": 9.61048158011585e-05, "loss": 0.6628, "step": 2215 }, { "epoch": 0.42788183046920253, "grad_norm": 1.2033578157424927, "learning_rate": 9.609829273641034e-05, "loss": 0.7093, "step": 2216 }, { "epoch": 0.4280749179378258, "grad_norm": 0.9026679992675781, "learning_rate": 9.609176443606305e-05, "loss": 0.6279, "step": 2217 }, { "epoch": 0.4282680054064491, "grad_norm": 1.1580013036727905, "learning_rate": 9.60852309008581e-05, "loss": 0.7902, "step": 2218 }, { "epoch": 0.4284610928750724, "grad_norm": 1.0054168701171875, "learning_rate": 9.607869213153752e-05, "loss": 0.6753, "step": 2219 }, { "epoch": 0.4286541803436957, "grad_norm": 1.3336938619613647, "learning_rate": 9.607214812884396e-05, "loss": 0.6594, "step": 2220 }, { "epoch": 0.42884726781231897, "grad_norm": 1.0889724493026733, "learning_rate": 9.606559889352064e-05, "loss": 0.6299, "step": 2221 }, { "epoch": 0.42904035528094225, "grad_norm": 0.8393813967704773, "learning_rate": 9.605904442631144e-05, "loss": 0.6605, "step": 2222 }, { "epoch": 0.42923344274956554, "grad_norm": 0.8404357433319092, "learning_rate": 9.60524847279607e-05, "loss": 0.7118, "step": 2223 }, { "epoch": 0.42942653021818883, "grad_norm": 1.2496833801269531, "learning_rate": 9.604591979921349e-05, "loss": 0.6656, "step": 2224 }, { "epoch": 0.4296196176868121, "grad_norm": 0.7175661325454712, "learning_rate": 9.60393496408154e-05, "loss": 0.6741, "step": 2225 }, { "epoch": 0.4298127051554354, "grad_norm": 0.9804867506027222, "learning_rate": 9.603277425351265e-05, "loss": 0.6034, "step": 2226 }, { "epoch": 0.4300057926240587, "grad_norm": 0.9635022282600403, "learning_rate": 9.602619363805204e-05, "loss": 0.6486, "step": 2227 }, { "epoch": 0.430198880092682, "grad_norm": 0.8057853579521179, "learning_rate": 9.601960779518091e-05, "loss": 0.6468, "step": 2228 }, { "epoch": 0.43039196756130527, "grad_norm": 1.2830544710159302, "learning_rate": 9.60130167256473e-05, "loss": 0.6106, "step": 2229 }, { "epoch": 0.43058505502992855, "grad_norm": 0.8884519338607788, "learning_rate": 9.600642043019978e-05, "loss": 0.7, "step": 2230 }, { "epoch": 0.43077814249855184, "grad_norm": 0.7880727052688599, "learning_rate": 9.599981890958751e-05, "loss": 0.5672, "step": 2231 }, { "epoch": 0.43097122996717513, "grad_norm": 1.501728892326355, "learning_rate": 9.599321216456025e-05, "loss": 0.6005, "step": 2232 }, { "epoch": 0.4311643174357984, "grad_norm": 1.2837351560592651, "learning_rate": 9.598660019586839e-05, "loss": 0.7889, "step": 2233 }, { "epoch": 0.4313574049044217, "grad_norm": 1.1547712087631226, "learning_rate": 9.597998300426285e-05, "loss": 0.6738, "step": 2234 }, { "epoch": 0.431550492373045, "grad_norm": 1.049505352973938, "learning_rate": 9.597336059049519e-05, "loss": 0.6722, "step": 2235 }, { "epoch": 0.4317435798416683, "grad_norm": 0.8678582906723022, "learning_rate": 9.596673295531753e-05, "loss": 0.7614, "step": 2236 }, { "epoch": 0.43193666731029157, "grad_norm": 10.454094886779785, "learning_rate": 9.596010009948264e-05, "loss": 0.6832, "step": 2237 }, { "epoch": 0.43212975477891485, "grad_norm": 1.3981162309646606, "learning_rate": 9.59534620237438e-05, "loss": 0.6538, "step": 2238 }, { "epoch": 0.43232284224753814, "grad_norm": 0.9288433790206909, "learning_rate": 9.594681872885495e-05, "loss": 0.7268, "step": 2239 }, { "epoch": 0.43251592971616143, "grad_norm": 1.042467713356018, "learning_rate": 9.594017021557062e-05, "loss": 0.6124, "step": 2240 }, { "epoch": 0.4327090171847847, "grad_norm": 0.8303380012512207, "learning_rate": 9.593351648464588e-05, "loss": 0.6755, "step": 2241 }, { "epoch": 0.432902104653408, "grad_norm": 0.6281551718711853, "learning_rate": 9.592685753683644e-05, "loss": 0.6247, "step": 2242 }, { "epoch": 0.4330951921220313, "grad_norm": 0.8884482979774475, "learning_rate": 9.592019337289858e-05, "loss": 0.6545, "step": 2243 }, { "epoch": 0.4332882795906546, "grad_norm": 0.9330300688743591, "learning_rate": 9.591352399358918e-05, "loss": 0.6614, "step": 2244 }, { "epoch": 0.43348136705927787, "grad_norm": 0.9302452206611633, "learning_rate": 9.590684939966572e-05, "loss": 0.7436, "step": 2245 }, { "epoch": 0.43367445452790115, "grad_norm": 0.9201809763908386, "learning_rate": 9.590016959188626e-05, "loss": 0.6946, "step": 2246 }, { "epoch": 0.43386754199652444, "grad_norm": 0.8792575001716614, "learning_rate": 9.589348457100946e-05, "loss": 0.6375, "step": 2247 }, { "epoch": 0.43406062946514773, "grad_norm": 0.8499538898468018, "learning_rate": 9.588679433779457e-05, "loss": 0.7227, "step": 2248 }, { "epoch": 0.434253716933771, "grad_norm": 14.176217079162598, "learning_rate": 9.588009889300145e-05, "loss": 0.6604, "step": 2249 }, { "epoch": 0.4344468044023943, "grad_norm": 0.7479870915412903, "learning_rate": 9.587339823739049e-05, "loss": 0.6051, "step": 2250 }, { "epoch": 0.4346398918710176, "grad_norm": 0.9424901604652405, "learning_rate": 9.586669237172275e-05, "loss": 0.623, "step": 2251 }, { "epoch": 0.4348329793396409, "grad_norm": 0.7870872020721436, "learning_rate": 9.585998129675981e-05, "loss": 0.627, "step": 2252 }, { "epoch": 0.43502606680826417, "grad_norm": 0.8299574851989746, "learning_rate": 9.585326501326394e-05, "loss": 0.6217, "step": 2253 }, { "epoch": 0.43521915427688745, "grad_norm": 0.9479416608810425, "learning_rate": 9.584654352199789e-05, "loss": 0.6867, "step": 2254 }, { "epoch": 0.43541224174551074, "grad_norm": 0.7647437453269958, "learning_rate": 9.583981682372505e-05, "loss": 0.6684, "step": 2255 }, { "epoch": 0.43560532921413403, "grad_norm": 1.2445721626281738, "learning_rate": 9.583308491920943e-05, "loss": 0.6889, "step": 2256 }, { "epoch": 0.43579841668275726, "grad_norm": 0.7412508130073547, "learning_rate": 9.582634780921558e-05, "loss": 0.641, "step": 2257 }, { "epoch": 0.43599150415138055, "grad_norm": 0.7279082536697388, "learning_rate": 9.581960549450868e-05, "loss": 0.6347, "step": 2258 }, { "epoch": 0.43618459162000384, "grad_norm": 0.7418180108070374, "learning_rate": 9.581285797585449e-05, "loss": 0.6207, "step": 2259 }, { "epoch": 0.4363776790886271, "grad_norm": 1.5564873218536377, "learning_rate": 9.580610525401934e-05, "loss": 0.6409, "step": 2260 }, { "epoch": 0.4365707665572504, "grad_norm": 7.816547870635986, "learning_rate": 9.579934732977018e-05, "loss": 0.7105, "step": 2261 }, { "epoch": 0.4367638540258737, "grad_norm": 0.7109919190406799, "learning_rate": 9.579258420387453e-05, "loss": 0.6316, "step": 2262 }, { "epoch": 0.436956941494497, "grad_norm": 0.9184997081756592, "learning_rate": 9.578581587710052e-05, "loss": 0.7015, "step": 2263 }, { "epoch": 0.4371500289631203, "grad_norm": 1.5691320896148682, "learning_rate": 9.577904235021686e-05, "loss": 0.6793, "step": 2264 }, { "epoch": 0.43734311643174356, "grad_norm": 1.3229780197143555, "learning_rate": 9.577226362399285e-05, "loss": 0.6484, "step": 2265 }, { "epoch": 0.43753620390036685, "grad_norm": 1.012924075126648, "learning_rate": 9.576547969919838e-05, "loss": 0.6424, "step": 2266 }, { "epoch": 0.43772929136899014, "grad_norm": 1.0676026344299316, "learning_rate": 9.575869057660393e-05, "loss": 0.6682, "step": 2267 }, { "epoch": 0.4379223788376134, "grad_norm": 0.9729299545288086, "learning_rate": 9.575189625698056e-05, "loss": 0.6343, "step": 2268 }, { "epoch": 0.4381154663062367, "grad_norm": 4.779476642608643, "learning_rate": 9.574509674109997e-05, "loss": 0.717, "step": 2269 }, { "epoch": 0.43830855377486, "grad_norm": 1.6406171321868896, "learning_rate": 9.573829202973438e-05, "loss": 0.6951, "step": 2270 }, { "epoch": 0.4385016412434833, "grad_norm": 0.6935713887214661, "learning_rate": 9.573148212365664e-05, "loss": 0.6751, "step": 2271 }, { "epoch": 0.4386947287121066, "grad_norm": 1.0834238529205322, "learning_rate": 9.572466702364019e-05, "loss": 0.7056, "step": 2272 }, { "epoch": 0.43888781618072986, "grad_norm": 0.8029088377952576, "learning_rate": 9.571784673045905e-05, "loss": 0.6856, "step": 2273 }, { "epoch": 0.43908090364935315, "grad_norm": 0.9711720943450928, "learning_rate": 9.571102124488783e-05, "loss": 0.6099, "step": 2274 }, { "epoch": 0.43927399111797644, "grad_norm": 0.6568674445152283, "learning_rate": 9.570419056770173e-05, "loss": 0.6726, "step": 2275 }, { "epoch": 0.4394670785865997, "grad_norm": 1.154329776763916, "learning_rate": 9.569735469967656e-05, "loss": 0.6775, "step": 2276 }, { "epoch": 0.439660166055223, "grad_norm": 1.591646671295166, "learning_rate": 9.569051364158868e-05, "loss": 0.6115, "step": 2277 }, { "epoch": 0.4398532535238463, "grad_norm": 0.7192596793174744, "learning_rate": 9.568366739421506e-05, "loss": 0.6791, "step": 2278 }, { "epoch": 0.4400463409924696, "grad_norm": 0.9235736727714539, "learning_rate": 9.567681595833326e-05, "loss": 0.7622, "step": 2279 }, { "epoch": 0.4402394284610929, "grad_norm": 0.8865248560905457, "learning_rate": 9.566995933472147e-05, "loss": 0.6515, "step": 2280 }, { "epoch": 0.44043251592971616, "grad_norm": 0.7772610783576965, "learning_rate": 9.566309752415838e-05, "loss": 0.6774, "step": 2281 }, { "epoch": 0.44062560339833945, "grad_norm": 1.0794005393981934, "learning_rate": 9.565623052742334e-05, "loss": 0.6534, "step": 2282 }, { "epoch": 0.44081869086696274, "grad_norm": 1.073345422744751, "learning_rate": 9.564935834529624e-05, "loss": 0.6631, "step": 2283 }, { "epoch": 0.441011778335586, "grad_norm": 0.8027778267860413, "learning_rate": 9.564248097855763e-05, "loss": 0.6467, "step": 2284 }, { "epoch": 0.4412048658042093, "grad_norm": 0.6632823944091797, "learning_rate": 9.563559842798859e-05, "loss": 0.6918, "step": 2285 }, { "epoch": 0.4413979532728326, "grad_norm": 0.7355054616928101, "learning_rate": 9.562871069437079e-05, "loss": 0.679, "step": 2286 }, { "epoch": 0.4415910407414559, "grad_norm": 0.6167284846305847, "learning_rate": 9.56218177784865e-05, "loss": 0.6624, "step": 2287 }, { "epoch": 0.4417841282100792, "grad_norm": 1.2047579288482666, "learning_rate": 9.56149196811186e-05, "loss": 0.6255, "step": 2288 }, { "epoch": 0.44197721567870246, "grad_norm": 0.7147185206413269, "learning_rate": 9.560801640305052e-05, "loss": 0.6057, "step": 2289 }, { "epoch": 0.44217030314732575, "grad_norm": 1.1083602905273438, "learning_rate": 9.560110794506632e-05, "loss": 0.6084, "step": 2290 }, { "epoch": 0.44236339061594904, "grad_norm": 0.8798747658729553, "learning_rate": 9.55941943079506e-05, "loss": 0.6102, "step": 2291 }, { "epoch": 0.4425564780845723, "grad_norm": 0.7500741481781006, "learning_rate": 9.558727549248858e-05, "loss": 0.6532, "step": 2292 }, { "epoch": 0.4427495655531956, "grad_norm": 0.6679942607879639, "learning_rate": 9.558035149946607e-05, "loss": 0.6568, "step": 2293 }, { "epoch": 0.4429426530218189, "grad_norm": 0.7481060028076172, "learning_rate": 9.557342232966949e-05, "loss": 0.6376, "step": 2294 }, { "epoch": 0.4431357404904422, "grad_norm": 0.5484201312065125, "learning_rate": 9.556648798388575e-05, "loss": 0.6554, "step": 2295 }, { "epoch": 0.4433288279590655, "grad_norm": 0.8469038605690002, "learning_rate": 9.555954846290247e-05, "loss": 0.6489, "step": 2296 }, { "epoch": 0.44352191542768876, "grad_norm": 0.8696244359016418, "learning_rate": 9.555260376750779e-05, "loss": 0.6573, "step": 2297 }, { "epoch": 0.44371500289631205, "grad_norm": 1.656996488571167, "learning_rate": 9.554565389849044e-05, "loss": 0.6469, "step": 2298 }, { "epoch": 0.44390809036493534, "grad_norm": 0.8446693420410156, "learning_rate": 9.553869885663975e-05, "loss": 0.6927, "step": 2299 }, { "epoch": 0.4441011778335586, "grad_norm": 0.8209642171859741, "learning_rate": 9.553173864274567e-05, "loss": 0.7515, "step": 2300 }, { "epoch": 0.4442942653021819, "grad_norm": 0.9703288674354553, "learning_rate": 9.552477325759866e-05, "loss": 0.6758, "step": 2301 }, { "epoch": 0.4444873527708052, "grad_norm": 0.8291042447090149, "learning_rate": 9.551780270198984e-05, "loss": 0.6768, "step": 2302 }, { "epoch": 0.4446804402394285, "grad_norm": 0.9101136326789856, "learning_rate": 9.551082697671088e-05, "loss": 0.6002, "step": 2303 }, { "epoch": 0.4448735277080518, "grad_norm": 0.9158825278282166, "learning_rate": 9.550384608255403e-05, "loss": 0.667, "step": 2304 }, { "epoch": 0.445066615176675, "grad_norm": 1.6595319509506226, "learning_rate": 9.549686002031218e-05, "loss": 0.6748, "step": 2305 }, { "epoch": 0.4452597026452983, "grad_norm": 2.110886335372925, "learning_rate": 9.548986879077872e-05, "loss": 0.6564, "step": 2306 }, { "epoch": 0.4454527901139216, "grad_norm": 1.5904431343078613, "learning_rate": 9.548287239474774e-05, "loss": 0.6283, "step": 2307 }, { "epoch": 0.44564587758254487, "grad_norm": 1.0468813180923462, "learning_rate": 9.547587083301379e-05, "loss": 0.665, "step": 2308 }, { "epoch": 0.44583896505116816, "grad_norm": 0.989469051361084, "learning_rate": 9.54688641063721e-05, "loss": 0.7003, "step": 2309 }, { "epoch": 0.44603205251979144, "grad_norm": 0.7917959690093994, "learning_rate": 9.546185221561848e-05, "loss": 0.6204, "step": 2310 }, { "epoch": 0.44622513998841473, "grad_norm": 1.282388687133789, "learning_rate": 9.545483516154925e-05, "loss": 0.6633, "step": 2311 }, { "epoch": 0.446418227457038, "grad_norm": 2.984170913696289, "learning_rate": 9.54478129449614e-05, "loss": 0.6731, "step": 2312 }, { "epoch": 0.4466113149256613, "grad_norm": 2.6349260807037354, "learning_rate": 9.544078556665248e-05, "loss": 0.7049, "step": 2313 }, { "epoch": 0.4468044023942846, "grad_norm": 4.734752655029297, "learning_rate": 9.543375302742063e-05, "loss": 0.6468, "step": 2314 }, { "epoch": 0.4469974898629079, "grad_norm": 2.788278818130493, "learning_rate": 9.542671532806453e-05, "loss": 0.7118, "step": 2315 }, { "epoch": 0.44719057733153117, "grad_norm": 1.11741042137146, "learning_rate": 9.541967246938353e-05, "loss": 0.7234, "step": 2316 }, { "epoch": 0.44738366480015446, "grad_norm": 2.064194440841675, "learning_rate": 9.54126244521775e-05, "loss": 0.6274, "step": 2317 }, { "epoch": 0.44757675226877774, "grad_norm": 1.2138484716415405, "learning_rate": 9.54055712772469e-05, "loss": 0.6281, "step": 2318 }, { "epoch": 0.44776983973740103, "grad_norm": 1.0105444192886353, "learning_rate": 9.539851294539281e-05, "loss": 0.5867, "step": 2319 }, { "epoch": 0.4479629272060243, "grad_norm": 0.6703458428382874, "learning_rate": 9.539144945741688e-05, "loss": 0.6309, "step": 2320 }, { "epoch": 0.4481560146746476, "grad_norm": 1.1519720554351807, "learning_rate": 9.538438081412133e-05, "loss": 0.6153, "step": 2321 }, { "epoch": 0.4483491021432709, "grad_norm": 0.8789555430412292, "learning_rate": 9.5377307016309e-05, "loss": 0.7035, "step": 2322 }, { "epoch": 0.4485421896118942, "grad_norm": 0.9043219685554504, "learning_rate": 9.537022806478329e-05, "loss": 0.6816, "step": 2323 }, { "epoch": 0.44873527708051747, "grad_norm": 0.7347421646118164, "learning_rate": 9.536314396034816e-05, "loss": 0.7089, "step": 2324 }, { "epoch": 0.44892836454914076, "grad_norm": 0.9342681765556335, "learning_rate": 9.535605470380821e-05, "loss": 0.7389, "step": 2325 }, { "epoch": 0.44912145201776404, "grad_norm": 0.6444578766822815, "learning_rate": 9.534896029596862e-05, "loss": 0.7627, "step": 2326 }, { "epoch": 0.44931453948638733, "grad_norm": 0.6798039674758911, "learning_rate": 9.534186073763509e-05, "loss": 0.7161, "step": 2327 }, { "epoch": 0.4495076269550106, "grad_norm": 0.6782324910163879, "learning_rate": 9.533475602961399e-05, "loss": 0.6768, "step": 2328 }, { "epoch": 0.4497007144236339, "grad_norm": 0.6967975497245789, "learning_rate": 9.53276461727122e-05, "loss": 0.687, "step": 2329 }, { "epoch": 0.4498938018922572, "grad_norm": 0.8809113502502441, "learning_rate": 9.532053116773725e-05, "loss": 0.6191, "step": 2330 }, { "epoch": 0.4500868893608805, "grad_norm": 0.6886032223701477, "learning_rate": 9.531341101549722e-05, "loss": 0.6874, "step": 2331 }, { "epoch": 0.45027997682950377, "grad_norm": 0.7222874164581299, "learning_rate": 9.530628571680075e-05, "loss": 0.6561, "step": 2332 }, { "epoch": 0.45047306429812706, "grad_norm": 1.001546859741211, "learning_rate": 9.529915527245712e-05, "loss": 0.714, "step": 2333 }, { "epoch": 0.45066615176675034, "grad_norm": 1.1578559875488281, "learning_rate": 9.529201968327616e-05, "loss": 0.6727, "step": 2334 }, { "epoch": 0.45085923923537363, "grad_norm": 1.0811829566955566, "learning_rate": 9.52848789500683e-05, "loss": 0.7144, "step": 2335 }, { "epoch": 0.4510523267039969, "grad_norm": 0.6431188583374023, "learning_rate": 9.527773307364454e-05, "loss": 0.7415, "step": 2336 }, { "epoch": 0.4512454141726202, "grad_norm": 3.7666516304016113, "learning_rate": 9.527058205481647e-05, "loss": 0.6975, "step": 2337 }, { "epoch": 0.4514385016412435, "grad_norm": 0.7579663991928101, "learning_rate": 9.526342589439627e-05, "loss": 0.6741, "step": 2338 }, { "epoch": 0.4516315891098668, "grad_norm": 0.862216591835022, "learning_rate": 9.52562645931967e-05, "loss": 0.6997, "step": 2339 }, { "epoch": 0.45182467657849007, "grad_norm": 0.5632801055908203, "learning_rate": 9.52490981520311e-05, "loss": 0.6798, "step": 2340 }, { "epoch": 0.45201776404711336, "grad_norm": 0.6064098477363586, "learning_rate": 9.524192657171338e-05, "loss": 0.6344, "step": 2341 }, { "epoch": 0.45221085151573664, "grad_norm": 0.7306402325630188, "learning_rate": 9.523474985305807e-05, "loss": 0.6285, "step": 2342 }, { "epoch": 0.45240393898435993, "grad_norm": 0.7039294838905334, "learning_rate": 9.522756799688026e-05, "loss": 0.5898, "step": 2343 }, { "epoch": 0.4525970264529832, "grad_norm": 0.945407509803772, "learning_rate": 9.522038100399562e-05, "loss": 0.6361, "step": 2344 }, { "epoch": 0.4527901139216065, "grad_norm": 0.6043581366539001, "learning_rate": 9.521318887522042e-05, "loss": 0.6264, "step": 2345 }, { "epoch": 0.4529832013902298, "grad_norm": 0.6823474764823914, "learning_rate": 9.520599161137149e-05, "loss": 0.6589, "step": 2346 }, { "epoch": 0.4531762888588531, "grad_norm": 0.9118191599845886, "learning_rate": 9.519878921326626e-05, "loss": 0.6152, "step": 2347 }, { "epoch": 0.45336937632747637, "grad_norm": 3.3963730335235596, "learning_rate": 9.519158168172277e-05, "loss": 0.5946, "step": 2348 }, { "epoch": 0.45356246379609966, "grad_norm": 0.7020583152770996, "learning_rate": 9.518436901755957e-05, "loss": 0.6412, "step": 2349 }, { "epoch": 0.45375555126472294, "grad_norm": 0.7376312017440796, "learning_rate": 9.517715122159586e-05, "loss": 0.6245, "step": 2350 }, { "epoch": 0.45394863873334623, "grad_norm": 0.905014157295227, "learning_rate": 9.51699282946514e-05, "loss": 0.697, "step": 2351 }, { "epoch": 0.45414172620196946, "grad_norm": 0.7505062222480774, "learning_rate": 9.516270023754652e-05, "loss": 0.6736, "step": 2352 }, { "epoch": 0.45433481367059275, "grad_norm": 1.69981050491333, "learning_rate": 9.515546705110216e-05, "loss": 0.6412, "step": 2353 }, { "epoch": 0.45452790113921604, "grad_norm": 0.9065603613853455, "learning_rate": 9.51482287361398e-05, "loss": 0.6322, "step": 2354 }, { "epoch": 0.4547209886078393, "grad_norm": 1.2592469453811646, "learning_rate": 9.514098529348156e-05, "loss": 0.6267, "step": 2355 }, { "epoch": 0.4549140760764626, "grad_norm": 0.8484622836112976, "learning_rate": 9.513373672395009e-05, "loss": 0.6484, "step": 2356 }, { "epoch": 0.4551071635450859, "grad_norm": 0.949127197265625, "learning_rate": 9.512648302836866e-05, "loss": 0.6322, "step": 2357 }, { "epoch": 0.4553002510137092, "grad_norm": 0.7492890954017639, "learning_rate": 9.511922420756108e-05, "loss": 0.7094, "step": 2358 }, { "epoch": 0.4554933384823325, "grad_norm": 0.762244462966919, "learning_rate": 9.51119602623518e-05, "loss": 0.6747, "step": 2359 }, { "epoch": 0.45568642595095576, "grad_norm": 9.235099792480469, "learning_rate": 9.51046911935658e-05, "loss": 0.6857, "step": 2360 }, { "epoch": 0.45587951341957905, "grad_norm": 0.7262943387031555, "learning_rate": 9.509741700202868e-05, "loss": 0.6515, "step": 2361 }, { "epoch": 0.45607260088820234, "grad_norm": 0.5803472995758057, "learning_rate": 9.509013768856659e-05, "loss": 0.6773, "step": 2362 }, { "epoch": 0.4562656883568256, "grad_norm": 1.0533355474472046, "learning_rate": 9.508285325400628e-05, "loss": 0.5909, "step": 2363 }, { "epoch": 0.4564587758254489, "grad_norm": 0.9990792274475098, "learning_rate": 9.507556369917507e-05, "loss": 0.6717, "step": 2364 }, { "epoch": 0.4566518632940722, "grad_norm": 0.6170540452003479, "learning_rate": 9.506826902490087e-05, "loss": 0.6288, "step": 2365 }, { "epoch": 0.4568449507626955, "grad_norm": 1.2236545085906982, "learning_rate": 9.506096923201218e-05, "loss": 0.6471, "step": 2366 }, { "epoch": 0.4570380382313188, "grad_norm": 0.8962268829345703, "learning_rate": 9.505366432133808e-05, "loss": 0.6488, "step": 2367 }, { "epoch": 0.45723112569994206, "grad_norm": 0.7168681621551514, "learning_rate": 9.504635429370819e-05, "loss": 0.7163, "step": 2368 }, { "epoch": 0.45742421316856535, "grad_norm": 0.6189460754394531, "learning_rate": 9.503903914995278e-05, "loss": 0.6746, "step": 2369 }, { "epoch": 0.45761730063718864, "grad_norm": 1.182955026626587, "learning_rate": 9.503171889090264e-05, "loss": 0.692, "step": 2370 }, { "epoch": 0.4578103881058119, "grad_norm": 0.6680576801300049, "learning_rate": 9.502439351738917e-05, "loss": 0.6384, "step": 2371 }, { "epoch": 0.4580034755744352, "grad_norm": 1.872998833656311, "learning_rate": 9.501706303024436e-05, "loss": 0.6695, "step": 2372 }, { "epoch": 0.4581965630430585, "grad_norm": 0.6675781011581421, "learning_rate": 9.500972743030077e-05, "loss": 0.7147, "step": 2373 }, { "epoch": 0.4583896505116818, "grad_norm": 1.1352386474609375, "learning_rate": 9.500238671839152e-05, "loss": 0.7246, "step": 2374 }, { "epoch": 0.4585827379803051, "grad_norm": 3.8692030906677246, "learning_rate": 9.499504089535033e-05, "loss": 0.6263, "step": 2375 }, { "epoch": 0.45877582544892836, "grad_norm": 0.9860671758651733, "learning_rate": 9.49876899620115e-05, "loss": 0.6376, "step": 2376 }, { "epoch": 0.45896891291755165, "grad_norm": 1.0634934902191162, "learning_rate": 9.498033391920995e-05, "loss": 0.6488, "step": 2377 }, { "epoch": 0.45916200038617494, "grad_norm": 0.96270751953125, "learning_rate": 9.497297276778108e-05, "loss": 0.6953, "step": 2378 }, { "epoch": 0.4593550878547982, "grad_norm": 0.6421748399734497, "learning_rate": 9.496560650856097e-05, "loss": 0.6275, "step": 2379 }, { "epoch": 0.4595481753234215, "grad_norm": 1.440152883529663, "learning_rate": 9.495823514238622e-05, "loss": 0.6089, "step": 2380 }, { "epoch": 0.4597412627920448, "grad_norm": 1.4392057657241821, "learning_rate": 9.495085867009404e-05, "loss": 0.6275, "step": 2381 }, { "epoch": 0.4599343502606681, "grad_norm": 0.81980299949646, "learning_rate": 9.494347709252222e-05, "loss": 0.6948, "step": 2382 }, { "epoch": 0.4601274377292914, "grad_norm": 0.8361682891845703, "learning_rate": 9.49360904105091e-05, "loss": 0.682, "step": 2383 }, { "epoch": 0.46032052519791467, "grad_norm": 1.2891947031021118, "learning_rate": 9.492869862489364e-05, "loss": 0.6811, "step": 2384 }, { "epoch": 0.46051361266653795, "grad_norm": 0.7491063475608826, "learning_rate": 9.492130173651533e-05, "loss": 0.719, "step": 2385 }, { "epoch": 0.46070670013516124, "grad_norm": 1.0398638248443604, "learning_rate": 9.491389974621429e-05, "loss": 0.7143, "step": 2386 }, { "epoch": 0.4608997876037845, "grad_norm": 0.8325546383857727, "learning_rate": 9.490649265483123e-05, "loss": 0.6762, "step": 2387 }, { "epoch": 0.4610928750724078, "grad_norm": 0.7510936856269836, "learning_rate": 9.489908046320735e-05, "loss": 0.6778, "step": 2388 }, { "epoch": 0.4612859625410311, "grad_norm": 0.865867018699646, "learning_rate": 9.489166317218451e-05, "loss": 0.6765, "step": 2389 }, { "epoch": 0.4614790500096544, "grad_norm": 0.690106213092804, "learning_rate": 9.488424078260514e-05, "loss": 0.6618, "step": 2390 }, { "epoch": 0.4616721374782777, "grad_norm": 0.9348751902580261, "learning_rate": 9.487681329531222e-05, "loss": 0.6016, "step": 2391 }, { "epoch": 0.46186522494690097, "grad_norm": 1.3811051845550537, "learning_rate": 9.486938071114932e-05, "loss": 0.6594, "step": 2392 }, { "epoch": 0.46205831241552425, "grad_norm": 1.0195772647857666, "learning_rate": 9.486194303096062e-05, "loss": 0.7101, "step": 2393 }, { "epoch": 0.46225139988414754, "grad_norm": 0.9355857968330383, "learning_rate": 9.485450025559083e-05, "loss": 0.686, "step": 2394 }, { "epoch": 0.4624444873527708, "grad_norm": 1.122339129447937, "learning_rate": 9.484705238588526e-05, "loss": 0.666, "step": 2395 }, { "epoch": 0.4626375748213941, "grad_norm": 0.949524462223053, "learning_rate": 9.48395994226898e-05, "loss": 0.6278, "step": 2396 }, { "epoch": 0.4628306622900174, "grad_norm": 0.9352090954780579, "learning_rate": 9.483214136685094e-05, "loss": 0.668, "step": 2397 }, { "epoch": 0.4630237497586407, "grad_norm": 0.9672131538391113, "learning_rate": 9.482467821921572e-05, "loss": 0.7334, "step": 2398 }, { "epoch": 0.463216837227264, "grad_norm": 1.1518218517303467, "learning_rate": 9.481720998063173e-05, "loss": 0.7497, "step": 2399 }, { "epoch": 0.4634099246958872, "grad_norm": 0.8457086086273193, "learning_rate": 9.480973665194721e-05, "loss": 0.7209, "step": 2400 }, { "epoch": 0.4636030121645105, "grad_norm": 0.911097526550293, "learning_rate": 9.480225823401092e-05, "loss": 0.6144, "step": 2401 }, { "epoch": 0.4637960996331338, "grad_norm": 0.9181901812553406, "learning_rate": 9.479477472767223e-05, "loss": 0.6307, "step": 2402 }, { "epoch": 0.4639891871017571, "grad_norm": 0.8535168170928955, "learning_rate": 9.478728613378107e-05, "loss": 0.6335, "step": 2403 }, { "epoch": 0.46418227457038036, "grad_norm": 1.604239821434021, "learning_rate": 9.477979245318797e-05, "loss": 0.6853, "step": 2404 }, { "epoch": 0.46437536203900365, "grad_norm": 0.8990112543106079, "learning_rate": 9.477229368674401e-05, "loss": 0.6435, "step": 2405 }, { "epoch": 0.46456844950762693, "grad_norm": 0.912573516368866, "learning_rate": 9.476478983530086e-05, "loss": 0.6264, "step": 2406 }, { "epoch": 0.4647615369762502, "grad_norm": 0.8236774206161499, "learning_rate": 9.475728089971076e-05, "loss": 0.7201, "step": 2407 }, { "epoch": 0.4649546244448735, "grad_norm": 0.5486627221107483, "learning_rate": 9.474976688082655e-05, "loss": 0.6712, "step": 2408 }, { "epoch": 0.4651477119134968, "grad_norm": 0.6953536868095398, "learning_rate": 9.474224777950162e-05, "loss": 0.7367, "step": 2409 }, { "epoch": 0.4653407993821201, "grad_norm": 0.7336874604225159, "learning_rate": 9.473472359658997e-05, "loss": 0.6458, "step": 2410 }, { "epoch": 0.4655338868507434, "grad_norm": 1.7891829013824463, "learning_rate": 9.472719433294613e-05, "loss": 0.745, "step": 2411 }, { "epoch": 0.46572697431936666, "grad_norm": 0.6716744303703308, "learning_rate": 9.471965998942525e-05, "loss": 0.7161, "step": 2412 }, { "epoch": 0.46592006178798995, "grad_norm": 0.7606045007705688, "learning_rate": 9.471212056688304e-05, "loss": 0.6235, "step": 2413 }, { "epoch": 0.46611314925661324, "grad_norm": 0.6802353858947754, "learning_rate": 9.470457606617579e-05, "loss": 0.6521, "step": 2414 }, { "epoch": 0.4663062367252365, "grad_norm": 0.644860565662384, "learning_rate": 9.469702648816034e-05, "loss": 0.7077, "step": 2415 }, { "epoch": 0.4664993241938598, "grad_norm": 2.4989123344421387, "learning_rate": 9.468947183369416e-05, "loss": 0.6665, "step": 2416 }, { "epoch": 0.4666924116624831, "grad_norm": 0.6456822752952576, "learning_rate": 9.468191210363527e-05, "loss": 0.6515, "step": 2417 }, { "epoch": 0.4668854991311064, "grad_norm": 0.6510558724403381, "learning_rate": 9.467434729884224e-05, "loss": 0.6178, "step": 2418 }, { "epoch": 0.4670785865997297, "grad_norm": 1.407997965812683, "learning_rate": 9.466677742017425e-05, "loss": 0.6396, "step": 2419 }, { "epoch": 0.46727167406835296, "grad_norm": 1.0414308309555054, "learning_rate": 9.465920246849105e-05, "loss": 0.6153, "step": 2420 }, { "epoch": 0.46746476153697625, "grad_norm": 1.2164864540100098, "learning_rate": 9.465162244465295e-05, "loss": 0.7353, "step": 2421 }, { "epoch": 0.46765784900559954, "grad_norm": 0.6581920981407166, "learning_rate": 9.464403734952088e-05, "loss": 0.6365, "step": 2422 }, { "epoch": 0.4678509364742228, "grad_norm": 0.6703659892082214, "learning_rate": 9.463644718395628e-05, "loss": 0.6288, "step": 2423 }, { "epoch": 0.4680440239428461, "grad_norm": 1.0855858325958252, "learning_rate": 9.462885194882121e-05, "loss": 0.6719, "step": 2424 }, { "epoch": 0.4682371114114694, "grad_norm": 2.0406551361083984, "learning_rate": 9.462125164497832e-05, "loss": 0.6551, "step": 2425 }, { "epoch": 0.4684301988800927, "grad_norm": 0.8773651123046875, "learning_rate": 9.46136462732908e-05, "loss": 0.6798, "step": 2426 }, { "epoch": 0.468623286348716, "grad_norm": 14.283578872680664, "learning_rate": 9.46060358346224e-05, "loss": 0.7679, "step": 2427 }, { "epoch": 0.46881637381733926, "grad_norm": 1.2852777242660522, "learning_rate": 9.459842032983748e-05, "loss": 0.7316, "step": 2428 }, { "epoch": 0.46900946128596255, "grad_norm": 1.8344560861587524, "learning_rate": 9.459079975980101e-05, "loss": 0.6646, "step": 2429 }, { "epoch": 0.46920254875458584, "grad_norm": 0.8188902139663696, "learning_rate": 9.458317412537848e-05, "loss": 0.6268, "step": 2430 }, { "epoch": 0.4693956362232091, "grad_norm": 0.7133570313453674, "learning_rate": 9.457554342743593e-05, "loss": 0.6071, "step": 2431 }, { "epoch": 0.4695887236918324, "grad_norm": 0.9561595916748047, "learning_rate": 9.456790766684005e-05, "loss": 0.6397, "step": 2432 }, { "epoch": 0.4697818111604557, "grad_norm": 0.8375346660614014, "learning_rate": 9.456026684445805e-05, "loss": 0.622, "step": 2433 }, { "epoch": 0.469974898629079, "grad_norm": 1.118987798690796, "learning_rate": 9.455262096115775e-05, "loss": 0.7047, "step": 2434 }, { "epoch": 0.4701679860977023, "grad_norm": 0.8388708829879761, "learning_rate": 9.454497001780753e-05, "loss": 0.6739, "step": 2435 }, { "epoch": 0.47036107356632556, "grad_norm": 0.8033398985862732, "learning_rate": 9.453731401527633e-05, "loss": 0.6465, "step": 2436 }, { "epoch": 0.47055416103494885, "grad_norm": 1.2090420722961426, "learning_rate": 9.452965295443367e-05, "loss": 0.6586, "step": 2437 }, { "epoch": 0.47074724850357214, "grad_norm": 0.7168941497802734, "learning_rate": 9.452198683614967e-05, "loss": 0.6322, "step": 2438 }, { "epoch": 0.4709403359721954, "grad_norm": 0.7778940200805664, "learning_rate": 9.451431566129503e-05, "loss": 0.6528, "step": 2439 }, { "epoch": 0.4711334234408187, "grad_norm": 1.1000531911849976, "learning_rate": 9.450663943074095e-05, "loss": 0.5985, "step": 2440 }, { "epoch": 0.471326510909442, "grad_norm": 0.8127528429031372, "learning_rate": 9.449895814535928e-05, "loss": 0.6746, "step": 2441 }, { "epoch": 0.4715195983780653, "grad_norm": 0.6919209957122803, "learning_rate": 9.449127180602243e-05, "loss": 0.6345, "step": 2442 }, { "epoch": 0.4717126858466886, "grad_norm": 0.8695315718650818, "learning_rate": 9.448358041360335e-05, "loss": 0.5772, "step": 2443 }, { "epoch": 0.47190577331531186, "grad_norm": 0.8649283647537231, "learning_rate": 9.447588396897562e-05, "loss": 0.6797, "step": 2444 }, { "epoch": 0.47209886078393515, "grad_norm": 0.769500732421875, "learning_rate": 9.446818247301332e-05, "loss": 0.7115, "step": 2445 }, { "epoch": 0.47229194825255844, "grad_norm": 0.791143536567688, "learning_rate": 9.446047592659119e-05, "loss": 0.6011, "step": 2446 }, { "epoch": 0.47248503572118167, "grad_norm": 0.690839946269989, "learning_rate": 9.445276433058446e-05, "loss": 0.6579, "step": 2447 }, { "epoch": 0.47267812318980496, "grad_norm": 0.6376823782920837, "learning_rate": 9.444504768586899e-05, "loss": 0.6491, "step": 2448 }, { "epoch": 0.47287121065842824, "grad_norm": 0.9527469277381897, "learning_rate": 9.44373259933212e-05, "loss": 0.6906, "step": 2449 }, { "epoch": 0.47306429812705153, "grad_norm": 0.6678383350372314, "learning_rate": 9.442959925381806e-05, "loss": 0.6368, "step": 2450 }, { "epoch": 0.4732573855956748, "grad_norm": 0.7298461198806763, "learning_rate": 9.442186746823716e-05, "loss": 0.6685, "step": 2451 }, { "epoch": 0.4734504730642981, "grad_norm": 0.6266881823539734, "learning_rate": 9.44141306374566e-05, "loss": 0.6408, "step": 2452 }, { "epoch": 0.4736435605329214, "grad_norm": 0.7061766982078552, "learning_rate": 9.440638876235512e-05, "loss": 0.6103, "step": 2453 }, { "epoch": 0.4738366480015447, "grad_norm": 1.3384329080581665, "learning_rate": 9.439864184381197e-05, "loss": 0.7113, "step": 2454 }, { "epoch": 0.47402973547016797, "grad_norm": 0.7522358298301697, "learning_rate": 9.439088988270704e-05, "loss": 0.5892, "step": 2455 }, { "epoch": 0.47422282293879126, "grad_norm": 0.9028751254081726, "learning_rate": 9.438313287992074e-05, "loss": 0.669, "step": 2456 }, { "epoch": 0.47441591040741454, "grad_norm": 0.6728627681732178, "learning_rate": 9.437537083633407e-05, "loss": 0.7075, "step": 2457 }, { "epoch": 0.47460899787603783, "grad_norm": 0.652358889579773, "learning_rate": 9.436760375282859e-05, "loss": 0.6655, "step": 2458 }, { "epoch": 0.4748020853446611, "grad_norm": 0.6488227844238281, "learning_rate": 9.435983163028645e-05, "loss": 0.6967, "step": 2459 }, { "epoch": 0.4749951728132844, "grad_norm": 0.9230695366859436, "learning_rate": 9.435205446959037e-05, "loss": 0.6684, "step": 2460 }, { "epoch": 0.4751882602819077, "grad_norm": 1.4415583610534668, "learning_rate": 9.434427227162364e-05, "loss": 0.68, "step": 2461 }, { "epoch": 0.475381347750531, "grad_norm": 0.7525731921195984, "learning_rate": 9.433648503727013e-05, "loss": 0.6554, "step": 2462 }, { "epoch": 0.47557443521915427, "grad_norm": 0.9091187119483948, "learning_rate": 9.432869276741424e-05, "loss": 0.636, "step": 2463 }, { "epoch": 0.47576752268777756, "grad_norm": 0.7591215968132019, "learning_rate": 9.432089546294103e-05, "loss": 0.7542, "step": 2464 }, { "epoch": 0.47596061015640084, "grad_norm": 0.9380583167076111, "learning_rate": 9.431309312473602e-05, "loss": 0.6837, "step": 2465 }, { "epoch": 0.47615369762502413, "grad_norm": 0.8392533659934998, "learning_rate": 9.430528575368538e-05, "loss": 0.6906, "step": 2466 }, { "epoch": 0.4763467850936474, "grad_norm": 0.8512941598892212, "learning_rate": 9.429747335067583e-05, "loss": 0.6664, "step": 2467 }, { "epoch": 0.4765398725622707, "grad_norm": 0.9057575464248657, "learning_rate": 9.428965591659467e-05, "loss": 0.6999, "step": 2468 }, { "epoch": 0.476732960030894, "grad_norm": 0.7209581732749939, "learning_rate": 9.428183345232975e-05, "loss": 0.715, "step": 2469 }, { "epoch": 0.4769260474995173, "grad_norm": 2.236159324645996, "learning_rate": 9.427400595876951e-05, "loss": 0.7405, "step": 2470 }, { "epoch": 0.47711913496814057, "grad_norm": 0.8974485993385315, "learning_rate": 9.426617343680295e-05, "loss": 0.6107, "step": 2471 }, { "epoch": 0.47731222243676386, "grad_norm": 0.7440195679664612, "learning_rate": 9.425833588731966e-05, "loss": 0.6778, "step": 2472 }, { "epoch": 0.47750530990538714, "grad_norm": 1.2347583770751953, "learning_rate": 9.425049331120977e-05, "loss": 0.646, "step": 2473 }, { "epoch": 0.47769839737401043, "grad_norm": 1.131026268005371, "learning_rate": 9.424264570936401e-05, "loss": 0.6947, "step": 2474 }, { "epoch": 0.4778914848426337, "grad_norm": 0.5810536742210388, "learning_rate": 9.423479308267365e-05, "loss": 0.6606, "step": 2475 }, { "epoch": 0.478084572311257, "grad_norm": 0.7615841627120972, "learning_rate": 9.422693543203058e-05, "loss": 0.6546, "step": 2476 }, { "epoch": 0.4782776597798803, "grad_norm": 0.6610844135284424, "learning_rate": 9.42190727583272e-05, "loss": 0.6271, "step": 2477 }, { "epoch": 0.4784707472485036, "grad_norm": 0.7378482222557068, "learning_rate": 9.421120506245653e-05, "loss": 0.6487, "step": 2478 }, { "epoch": 0.47866383471712687, "grad_norm": 0.9379624128341675, "learning_rate": 9.420333234531214e-05, "loss": 0.6501, "step": 2479 }, { "epoch": 0.47885692218575016, "grad_norm": 0.9299198985099792, "learning_rate": 9.419545460778816e-05, "loss": 0.6031, "step": 2480 }, { "epoch": 0.47905000965437344, "grad_norm": 0.8337319493293762, "learning_rate": 9.41875718507793e-05, "loss": 0.6958, "step": 2481 }, { "epoch": 0.47924309712299673, "grad_norm": 0.7880105972290039, "learning_rate": 9.417968407518087e-05, "loss": 0.617, "step": 2482 }, { "epoch": 0.47943618459162, "grad_norm": 0.6068995594978333, "learning_rate": 9.41717912818887e-05, "loss": 0.6243, "step": 2483 }, { "epoch": 0.4796292720602433, "grad_norm": 0.6634697914123535, "learning_rate": 9.41638934717992e-05, "loss": 0.6884, "step": 2484 }, { "epoch": 0.4798223595288666, "grad_norm": 0.771349310874939, "learning_rate": 9.415599064580937e-05, "loss": 0.628, "step": 2485 }, { "epoch": 0.4800154469974899, "grad_norm": 0.804161548614502, "learning_rate": 9.414808280481679e-05, "loss": 0.5864, "step": 2486 }, { "epoch": 0.48020853446611317, "grad_norm": 0.6389366984367371, "learning_rate": 9.41401699497196e-05, "loss": 0.741, "step": 2487 }, { "epoch": 0.48040162193473646, "grad_norm": 1.3017096519470215, "learning_rate": 9.413225208141642e-05, "loss": 0.6268, "step": 2488 }, { "epoch": 0.48059470940335974, "grad_norm": 0.7369444370269775, "learning_rate": 9.412432920080661e-05, "loss": 0.6919, "step": 2489 }, { "epoch": 0.48078779687198303, "grad_norm": 0.6690483093261719, "learning_rate": 9.411640130878998e-05, "loss": 0.6887, "step": 2490 }, { "epoch": 0.4809808843406063, "grad_norm": 0.7737398147583008, "learning_rate": 9.410846840626691e-05, "loss": 0.6662, "step": 2491 }, { "epoch": 0.4811739718092296, "grad_norm": 0.6893042325973511, "learning_rate": 9.410053049413841e-05, "loss": 0.5763, "step": 2492 }, { "epoch": 0.4813670592778529, "grad_norm": 0.8440071940422058, "learning_rate": 9.409258757330603e-05, "loss": 0.6124, "step": 2493 }, { "epoch": 0.4815601467464762, "grad_norm": 0.6539273262023926, "learning_rate": 9.408463964467184e-05, "loss": 0.6044, "step": 2494 }, { "epoch": 0.4817532342150994, "grad_norm": 0.8559293746948242, "learning_rate": 9.407668670913857e-05, "loss": 0.6772, "step": 2495 }, { "epoch": 0.4819463216837227, "grad_norm": 0.8857111930847168, "learning_rate": 9.406872876760946e-05, "loss": 0.7146, "step": 2496 }, { "epoch": 0.482139409152346, "grad_norm": 1.0160536766052246, "learning_rate": 9.406076582098832e-05, "loss": 0.6791, "step": 2497 }, { "epoch": 0.4823324966209693, "grad_norm": 0.6636801362037659, "learning_rate": 9.405279787017953e-05, "loss": 0.726, "step": 2498 }, { "epoch": 0.48252558408959256, "grad_norm": 0.9374979138374329, "learning_rate": 9.404482491608808e-05, "loss": 0.6796, "step": 2499 }, { "epoch": 0.48271867155821585, "grad_norm": 0.6684019565582275, "learning_rate": 9.403684695961948e-05, "loss": 0.6863, "step": 2500 }, { "epoch": 0.48271867155821585, "eval_loss": 0.7051355242729187, "eval_runtime": 49.0819, "eval_samples_per_second": 13.528, "eval_steps_per_second": 0.428, "step": 2500 }, { "epoch": 0.48291175902683914, "grad_norm": 0.642139732837677, "learning_rate": 9.402886400167981e-05, "loss": 0.6329, "step": 2501 }, { "epoch": 0.4831048464954624, "grad_norm": 0.8069069385528564, "learning_rate": 9.402087604317576e-05, "loss": 0.7329, "step": 2502 }, { "epoch": 0.4832979339640857, "grad_norm": 0.6645339131355286, "learning_rate": 9.401288308501453e-05, "loss": 0.6828, "step": 2503 }, { "epoch": 0.483491021432709, "grad_norm": 0.8054146766662598, "learning_rate": 9.400488512810395e-05, "loss": 0.6208, "step": 2504 }, { "epoch": 0.4836841089013323, "grad_norm": 0.6263300776481628, "learning_rate": 9.399688217335234e-05, "loss": 0.633, "step": 2505 }, { "epoch": 0.4838771963699556, "grad_norm": 0.7251235246658325, "learning_rate": 9.398887422166868e-05, "loss": 0.7115, "step": 2506 }, { "epoch": 0.48407028383857886, "grad_norm": 1.3578987121582031, "learning_rate": 9.398086127396245e-05, "loss": 0.6813, "step": 2507 }, { "epoch": 0.48426337130720215, "grad_norm": 0.5470561981201172, "learning_rate": 9.397284333114372e-05, "loss": 0.6775, "step": 2508 }, { "epoch": 0.48445645877582544, "grad_norm": 0.7248647809028625, "learning_rate": 9.396482039412312e-05, "loss": 0.665, "step": 2509 }, { "epoch": 0.4846495462444487, "grad_norm": 0.8897822499275208, "learning_rate": 9.395679246381185e-05, "loss": 0.6696, "step": 2510 }, { "epoch": 0.484842633713072, "grad_norm": 0.9062085747718811, "learning_rate": 9.394875954112169e-05, "loss": 0.6846, "step": 2511 }, { "epoch": 0.4850357211816953, "grad_norm": 0.7134514451026917, "learning_rate": 9.394072162696498e-05, "loss": 0.6855, "step": 2512 }, { "epoch": 0.4852288086503186, "grad_norm": 0.700451672077179, "learning_rate": 9.393267872225462e-05, "loss": 0.642, "step": 2513 }, { "epoch": 0.4854218961189419, "grad_norm": 0.6018043160438538, "learning_rate": 9.392463082790406e-05, "loss": 0.6676, "step": 2514 }, { "epoch": 0.48561498358756516, "grad_norm": 0.7920552492141724, "learning_rate": 9.391657794482736e-05, "loss": 0.7071, "step": 2515 }, { "epoch": 0.48580807105618845, "grad_norm": 0.7233330607414246, "learning_rate": 9.390852007393914e-05, "loss": 0.626, "step": 2516 }, { "epoch": 0.48600115852481174, "grad_norm": 1.1909284591674805, "learning_rate": 9.390045721615451e-05, "loss": 0.6725, "step": 2517 }, { "epoch": 0.486194245993435, "grad_norm": 0.9808858633041382, "learning_rate": 9.389238937238927e-05, "loss": 0.714, "step": 2518 }, { "epoch": 0.4863873334620583, "grad_norm": 0.7388303875923157, "learning_rate": 9.388431654355969e-05, "loss": 0.6485, "step": 2519 }, { "epoch": 0.4865804209306816, "grad_norm": 4.483706951141357, "learning_rate": 9.387623873058265e-05, "loss": 0.6678, "step": 2520 }, { "epoch": 0.4867735083993049, "grad_norm": 0.7220730185508728, "learning_rate": 9.386815593437558e-05, "loss": 0.5672, "step": 2521 }, { "epoch": 0.4869665958679282, "grad_norm": 0.9158986210823059, "learning_rate": 9.386006815585649e-05, "loss": 0.5958, "step": 2522 }, { "epoch": 0.48715968333655146, "grad_norm": 0.6455096006393433, "learning_rate": 9.385197539594393e-05, "loss": 0.6266, "step": 2523 }, { "epoch": 0.48735277080517475, "grad_norm": 0.6307367086410522, "learning_rate": 9.384387765555704e-05, "loss": 0.6919, "step": 2524 }, { "epoch": 0.48754585827379804, "grad_norm": 0.6080496311187744, "learning_rate": 9.383577493561553e-05, "loss": 0.5808, "step": 2525 }, { "epoch": 0.4877389457424213, "grad_norm": 0.6725033521652222, "learning_rate": 9.382766723703964e-05, "loss": 0.6681, "step": 2526 }, { "epoch": 0.4879320332110446, "grad_norm": 0.6512596607208252, "learning_rate": 9.381955456075022e-05, "loss": 0.6665, "step": 2527 }, { "epoch": 0.4881251206796679, "grad_norm": 0.6921634078025818, "learning_rate": 9.381143690766865e-05, "loss": 0.7617, "step": 2528 }, { "epoch": 0.4883182081482912, "grad_norm": 0.9919264912605286, "learning_rate": 9.380331427871691e-05, "loss": 0.695, "step": 2529 }, { "epoch": 0.4885112956169145, "grad_norm": 2.0880444049835205, "learning_rate": 9.379518667481752e-05, "loss": 0.6383, "step": 2530 }, { "epoch": 0.48870438308553776, "grad_norm": 0.5514667630195618, "learning_rate": 9.378705409689355e-05, "loss": 0.5682, "step": 2531 }, { "epoch": 0.48889747055416105, "grad_norm": 0.8211861252784729, "learning_rate": 9.377891654586868e-05, "loss": 0.6691, "step": 2532 }, { "epoch": 0.48909055802278434, "grad_norm": 0.6829741597175598, "learning_rate": 9.37707740226671e-05, "loss": 0.6701, "step": 2533 }, { "epoch": 0.4892836454914076, "grad_norm": 4.862990856170654, "learning_rate": 9.376262652821364e-05, "loss": 0.6874, "step": 2534 }, { "epoch": 0.4894767329600309, "grad_norm": 0.8101476430892944, "learning_rate": 9.375447406343359e-05, "loss": 0.707, "step": 2535 }, { "epoch": 0.4896698204286542, "grad_norm": 0.8637226223945618, "learning_rate": 9.374631662925292e-05, "loss": 0.6821, "step": 2536 }, { "epoch": 0.4898629078972775, "grad_norm": 2.15211820602417, "learning_rate": 9.373815422659806e-05, "loss": 0.7218, "step": 2537 }, { "epoch": 0.4900559953659008, "grad_norm": 0.8090702891349792, "learning_rate": 9.372998685639609e-05, "loss": 0.6781, "step": 2538 }, { "epoch": 0.49024908283452406, "grad_norm": 0.6308301091194153, "learning_rate": 9.37218145195746e-05, "loss": 0.7016, "step": 2539 }, { "epoch": 0.49044217030314735, "grad_norm": 0.5001327991485596, "learning_rate": 9.371363721706177e-05, "loss": 0.5923, "step": 2540 }, { "epoch": 0.49063525777177064, "grad_norm": 0.5805062651634216, "learning_rate": 9.370545494978632e-05, "loss": 0.755, "step": 2541 }, { "epoch": 0.49082834524039387, "grad_norm": 1.5651514530181885, "learning_rate": 9.369726771867756e-05, "loss": 0.6108, "step": 2542 }, { "epoch": 0.49102143270901716, "grad_norm": 0.5223578214645386, "learning_rate": 9.368907552466534e-05, "loss": 0.7416, "step": 2543 }, { "epoch": 0.49121452017764045, "grad_norm": 1.4089345932006836, "learning_rate": 9.36808783686801e-05, "loss": 0.7208, "step": 2544 }, { "epoch": 0.49140760764626373, "grad_norm": 0.6504549384117126, "learning_rate": 9.367267625165283e-05, "loss": 0.6522, "step": 2545 }, { "epoch": 0.491600695114887, "grad_norm": 0.4511851370334625, "learning_rate": 9.366446917451505e-05, "loss": 0.6023, "step": 2546 }, { "epoch": 0.4917937825835103, "grad_norm": 0.4808913469314575, "learning_rate": 9.365625713819893e-05, "loss": 0.6324, "step": 2547 }, { "epoch": 0.4919868700521336, "grad_norm": 1.0025384426116943, "learning_rate": 9.364804014363712e-05, "loss": 0.6588, "step": 2548 }, { "epoch": 0.4921799575207569, "grad_norm": 0.6339395642280579, "learning_rate": 9.363981819176285e-05, "loss": 0.6417, "step": 2549 }, { "epoch": 0.49237304498938017, "grad_norm": 0.42982617020606995, "learning_rate": 9.363159128350996e-05, "loss": 0.7028, "step": 2550 }, { "epoch": 0.49256613245800346, "grad_norm": 0.512961745262146, "learning_rate": 9.362335941981277e-05, "loss": 0.6302, "step": 2551 }, { "epoch": 0.49275921992662675, "grad_norm": 0.702860951423645, "learning_rate": 9.361512260160625e-05, "loss": 0.6789, "step": 2552 }, { "epoch": 0.49295230739525003, "grad_norm": 0.6415925025939941, "learning_rate": 9.360688082982589e-05, "loss": 0.6293, "step": 2553 }, { "epoch": 0.4931453948638733, "grad_norm": 0.4313020706176758, "learning_rate": 9.359863410540774e-05, "loss": 0.6486, "step": 2554 }, { "epoch": 0.4933384823324966, "grad_norm": 1.143450379371643, "learning_rate": 9.359038242928841e-05, "loss": 0.6181, "step": 2555 }, { "epoch": 0.4935315698011199, "grad_norm": 0.5041948556900024, "learning_rate": 9.358212580240511e-05, "loss": 0.7191, "step": 2556 }, { "epoch": 0.4937246572697432, "grad_norm": 0.46899089217185974, "learning_rate": 9.357386422569556e-05, "loss": 0.6501, "step": 2557 }, { "epoch": 0.49391774473836647, "grad_norm": 0.6313336491584778, "learning_rate": 9.356559770009805e-05, "loss": 0.6654, "step": 2558 }, { "epoch": 0.49411083220698976, "grad_norm": 2.0989089012145996, "learning_rate": 9.35573262265515e-05, "loss": 0.5732, "step": 2559 }, { "epoch": 0.49430391967561305, "grad_norm": 0.5683858394622803, "learning_rate": 9.354904980599529e-05, "loss": 0.6148, "step": 2560 }, { "epoch": 0.49449700714423633, "grad_norm": 0.5234796404838562, "learning_rate": 9.354076843936942e-05, "loss": 0.7169, "step": 2561 }, { "epoch": 0.4946900946128596, "grad_norm": 0.49191126227378845, "learning_rate": 9.353248212761447e-05, "loss": 0.6768, "step": 2562 }, { "epoch": 0.4948831820814829, "grad_norm": 0.8083773851394653, "learning_rate": 9.352419087167153e-05, "loss": 0.7235, "step": 2563 }, { "epoch": 0.4950762695501062, "grad_norm": 0.6737403273582458, "learning_rate": 9.35158946724823e-05, "loss": 0.696, "step": 2564 }, { "epoch": 0.4952693570187295, "grad_norm": 0.771583616733551, "learning_rate": 9.350759353098899e-05, "loss": 0.7054, "step": 2565 }, { "epoch": 0.49546244448735277, "grad_norm": 0.7094066143035889, "learning_rate": 9.349928744813443e-05, "loss": 0.6713, "step": 2566 }, { "epoch": 0.49565553195597606, "grad_norm": 1.1471352577209473, "learning_rate": 9.349097642486196e-05, "loss": 0.7217, "step": 2567 }, { "epoch": 0.49584861942459935, "grad_norm": 1.1715819835662842, "learning_rate": 9.34826604621155e-05, "loss": 0.6701, "step": 2568 }, { "epoch": 0.49604170689322263, "grad_norm": 0.631693422794342, "learning_rate": 9.347433956083955e-05, "loss": 0.7125, "step": 2569 }, { "epoch": 0.4962347943618459, "grad_norm": 0.7155859470367432, "learning_rate": 9.346601372197914e-05, "loss": 0.6156, "step": 2570 }, { "epoch": 0.4964278818304692, "grad_norm": 0.5879980325698853, "learning_rate": 9.345768294647987e-05, "loss": 0.6167, "step": 2571 }, { "epoch": 0.4966209692990925, "grad_norm": 0.6866356730461121, "learning_rate": 9.34493472352879e-05, "loss": 0.7001, "step": 2572 }, { "epoch": 0.4968140567677158, "grad_norm": 0.664303183555603, "learning_rate": 9.344100658935e-05, "loss": 0.6106, "step": 2573 }, { "epoch": 0.49700714423633907, "grad_norm": 0.7540839314460754, "learning_rate": 9.343266100961341e-05, "loss": 0.6638, "step": 2574 }, { "epoch": 0.49720023170496236, "grad_norm": 0.6885585188865662, "learning_rate": 9.3424310497026e-05, "loss": 0.6883, "step": 2575 }, { "epoch": 0.49739331917358565, "grad_norm": 0.6111259460449219, "learning_rate": 9.341595505253615e-05, "loss": 0.6884, "step": 2576 }, { "epoch": 0.49758640664220893, "grad_norm": 0.7375184297561646, "learning_rate": 9.340759467709286e-05, "loss": 0.6774, "step": 2577 }, { "epoch": 0.4977794941108322, "grad_norm": 0.7833519577980042, "learning_rate": 9.339922937164565e-05, "loss": 0.7114, "step": 2578 }, { "epoch": 0.4979725815794555, "grad_norm": 0.6685376763343811, "learning_rate": 9.339085913714458e-05, "loss": 0.6152, "step": 2579 }, { "epoch": 0.4981656690480788, "grad_norm": 0.750262975692749, "learning_rate": 9.338248397454033e-05, "loss": 0.669, "step": 2580 }, { "epoch": 0.4983587565167021, "grad_norm": 1.1754472255706787, "learning_rate": 9.33741038847841e-05, "loss": 0.6769, "step": 2581 }, { "epoch": 0.49855184398532537, "grad_norm": 0.6133560538291931, "learning_rate": 9.336571886882764e-05, "loss": 0.6674, "step": 2582 }, { "epoch": 0.49874493145394866, "grad_norm": 0.5647743940353394, "learning_rate": 9.335732892762328e-05, "loss": 0.6564, "step": 2583 }, { "epoch": 0.49893801892257195, "grad_norm": 0.6797627210617065, "learning_rate": 9.33489340621239e-05, "loss": 0.6583, "step": 2584 }, { "epoch": 0.49913110639119523, "grad_norm": 0.5627490878105164, "learning_rate": 9.334053427328297e-05, "loss": 0.6658, "step": 2585 }, { "epoch": 0.4993241938598185, "grad_norm": 0.5924085974693298, "learning_rate": 9.333212956205446e-05, "loss": 0.6844, "step": 2586 }, { "epoch": 0.4995172813284418, "grad_norm": 0.6298132538795471, "learning_rate": 9.332371992939297e-05, "loss": 0.6829, "step": 2587 }, { "epoch": 0.4997103687970651, "grad_norm": 0.7037111520767212, "learning_rate": 9.331530537625359e-05, "loss": 0.6959, "step": 2588 }, { "epoch": 0.4999034562656884, "grad_norm": 1.3920997381210327, "learning_rate": 9.330688590359203e-05, "loss": 0.6405, "step": 2589 }, { "epoch": 0.5000965437343117, "grad_norm": 0.7824984192848206, "learning_rate": 9.329846151236447e-05, "loss": 0.6158, "step": 2590 }, { "epoch": 0.5002896312029349, "grad_norm": 0.9002266526222229, "learning_rate": 9.329003220352778e-05, "loss": 0.6386, "step": 2591 }, { "epoch": 0.5004827186715582, "grad_norm": 1.1490558385849, "learning_rate": 9.328159797803928e-05, "loss": 0.6525, "step": 2592 }, { "epoch": 0.5006758061401815, "grad_norm": 0.7491048574447632, "learning_rate": 9.327315883685687e-05, "loss": 0.6613, "step": 2593 }, { "epoch": 0.5008688936088048, "grad_norm": 1.0086987018585205, "learning_rate": 9.326471478093906e-05, "loss": 0.5934, "step": 2594 }, { "epoch": 0.501061981077428, "grad_norm": 0.6179349422454834, "learning_rate": 9.325626581124484e-05, "loss": 0.6812, "step": 2595 }, { "epoch": 0.5012550685460514, "grad_norm": 0.5498674511909485, "learning_rate": 9.324781192873384e-05, "loss": 0.6815, "step": 2596 }, { "epoch": 0.5014481560146746, "grad_norm": 0.5807647109031677, "learning_rate": 9.32393531343662e-05, "loss": 0.7559, "step": 2597 }, { "epoch": 0.501641243483298, "grad_norm": 0.4652945399284363, "learning_rate": 9.323088942910258e-05, "loss": 0.6588, "step": 2598 }, { "epoch": 0.5018343309519212, "grad_norm": 0.5767107009887695, "learning_rate": 9.322242081390431e-05, "loss": 0.6938, "step": 2599 }, { "epoch": 0.5020274184205445, "grad_norm": 1.0106242895126343, "learning_rate": 9.321394728973315e-05, "loss": 0.7277, "step": 2600 }, { "epoch": 0.5022205058891678, "grad_norm": 0.6544667482376099, "learning_rate": 9.32054688575515e-05, "loss": 0.682, "step": 2601 }, { "epoch": 0.5024135933577911, "grad_norm": 0.9768699407577515, "learning_rate": 9.319698551832232e-05, "loss": 0.6286, "step": 2602 }, { "epoch": 0.5026066808264144, "grad_norm": 0.7912124395370483, "learning_rate": 9.318849727300907e-05, "loss": 0.6978, "step": 2603 }, { "epoch": 0.5027997682950377, "grad_norm": 0.620358407497406, "learning_rate": 9.318000412257581e-05, "loss": 0.6116, "step": 2604 }, { "epoch": 0.5029928557636609, "grad_norm": 0.5617654919624329, "learning_rate": 9.317150606798716e-05, "loss": 0.6492, "step": 2605 }, { "epoch": 0.5031859432322843, "grad_norm": 1.0602788925170898, "learning_rate": 9.316300311020827e-05, "loss": 0.6817, "step": 2606 }, { "epoch": 0.5033790307009075, "grad_norm": 0.8353971242904663, "learning_rate": 9.315449525020486e-05, "loss": 0.712, "step": 2607 }, { "epoch": 0.5035721181695308, "grad_norm": 1.0521199703216553, "learning_rate": 9.314598248894319e-05, "loss": 0.6889, "step": 2608 }, { "epoch": 0.5037652056381541, "grad_norm": 0.7221295237541199, "learning_rate": 9.313746482739015e-05, "loss": 0.6214, "step": 2609 }, { "epoch": 0.5039582931067774, "grad_norm": 0.7199450135231018, "learning_rate": 9.312894226651307e-05, "loss": 0.6694, "step": 2610 }, { "epoch": 0.5041513805754007, "grad_norm": 0.7322970032691956, "learning_rate": 9.312041480727993e-05, "loss": 0.6615, "step": 2611 }, { "epoch": 0.504344468044024, "grad_norm": 0.9313361644744873, "learning_rate": 9.311188245065923e-05, "loss": 0.6313, "step": 2612 }, { "epoch": 0.5045375555126472, "grad_norm": 1.391343116760254, "learning_rate": 9.310334519762002e-05, "loss": 0.6966, "step": 2613 }, { "epoch": 0.5047306429812705, "grad_norm": 0.8443551659584045, "learning_rate": 9.309480304913191e-05, "loss": 0.6399, "step": 2614 }, { "epoch": 0.5049237304498938, "grad_norm": 1.915328860282898, "learning_rate": 9.308625600616512e-05, "loss": 0.6689, "step": 2615 }, { "epoch": 0.505116817918517, "grad_norm": 1.2603285312652588, "learning_rate": 9.30777040696903e-05, "loss": 0.6991, "step": 2616 }, { "epoch": 0.5053099053871404, "grad_norm": 0.8051640391349792, "learning_rate": 9.306914724067881e-05, "loss": 0.678, "step": 2617 }, { "epoch": 0.5055029928557636, "grad_norm": 0.8488961458206177, "learning_rate": 9.306058552010242e-05, "loss": 0.7209, "step": 2618 }, { "epoch": 0.505696080324387, "grad_norm": 0.5963205695152283, "learning_rate": 9.305201890893359e-05, "loss": 0.5558, "step": 2619 }, { "epoch": 0.5058891677930102, "grad_norm": 0.8218706250190735, "learning_rate": 9.304344740814522e-05, "loss": 0.6496, "step": 2620 }, { "epoch": 0.5060822552616335, "grad_norm": 0.9338238835334778, "learning_rate": 9.303487101871084e-05, "loss": 0.7024, "step": 2621 }, { "epoch": 0.5062753427302568, "grad_norm": 0.7131787538528442, "learning_rate": 9.30262897416045e-05, "loss": 0.6801, "step": 2622 }, { "epoch": 0.5064684301988801, "grad_norm": 0.641059160232544, "learning_rate": 9.301770357780083e-05, "loss": 0.6642, "step": 2623 }, { "epoch": 0.5066615176675033, "grad_norm": 0.6799026727676392, "learning_rate": 9.300911252827498e-05, "loss": 0.6667, "step": 2624 }, { "epoch": 0.5068546051361267, "grad_norm": 0.7753406167030334, "learning_rate": 9.30005165940027e-05, "loss": 0.6132, "step": 2625 }, { "epoch": 0.5070476926047499, "grad_norm": 0.7762496471405029, "learning_rate": 9.299191577596025e-05, "loss": 0.6277, "step": 2626 }, { "epoch": 0.5072407800733733, "grad_norm": 0.6709523797035217, "learning_rate": 9.298331007512447e-05, "loss": 0.7297, "step": 2627 }, { "epoch": 0.5074338675419965, "grad_norm": 0.7206649780273438, "learning_rate": 9.297469949247276e-05, "loss": 0.5529, "step": 2628 }, { "epoch": 0.5076269550106198, "grad_norm": 0.6537874341011047, "learning_rate": 9.296608402898306e-05, "loss": 0.6416, "step": 2629 }, { "epoch": 0.5078200424792431, "grad_norm": 1.0069931745529175, "learning_rate": 9.295746368563386e-05, "loss": 0.7007, "step": 2630 }, { "epoch": 0.5080131299478664, "grad_norm": 0.6265478730201721, "learning_rate": 9.294883846340422e-05, "loss": 0.687, "step": 2631 }, { "epoch": 0.5082062174164896, "grad_norm": 0.8440658450126648, "learning_rate": 9.294020836327374e-05, "loss": 0.6852, "step": 2632 }, { "epoch": 0.508399304885113, "grad_norm": 1.0306668281555176, "learning_rate": 9.29315733862226e-05, "loss": 0.6604, "step": 2633 }, { "epoch": 0.5085923923537362, "grad_norm": 1.1979292631149292, "learning_rate": 9.292293353323149e-05, "loss": 0.6101, "step": 2634 }, { "epoch": 0.5087854798223596, "grad_norm": 0.9662997126579285, "learning_rate": 9.291428880528168e-05, "loss": 0.6425, "step": 2635 }, { "epoch": 0.5089785672909828, "grad_norm": 0.829404354095459, "learning_rate": 9.290563920335502e-05, "loss": 0.7061, "step": 2636 }, { "epoch": 0.5091716547596061, "grad_norm": 0.9506402015686035, "learning_rate": 9.289698472843387e-05, "loss": 0.6812, "step": 2637 }, { "epoch": 0.5093647422282294, "grad_norm": 1.0762405395507812, "learning_rate": 9.288832538150114e-05, "loss": 0.6579, "step": 2638 }, { "epoch": 0.5095578296968527, "grad_norm": 0.5828184485435486, "learning_rate": 9.287966116354034e-05, "loss": 0.6561, "step": 2639 }, { "epoch": 0.5097509171654759, "grad_norm": 1.1832164525985718, "learning_rate": 9.287099207553549e-05, "loss": 0.7219, "step": 2640 }, { "epoch": 0.5099440046340993, "grad_norm": 2.2118701934814453, "learning_rate": 9.286231811847119e-05, "loss": 0.6086, "step": 2641 }, { "epoch": 0.5101370921027225, "grad_norm": 0.6190136671066284, "learning_rate": 9.285363929333257e-05, "loss": 0.7144, "step": 2642 }, { "epoch": 0.5103301795713459, "grad_norm": 0.7709345817565918, "learning_rate": 9.284495560110535e-05, "loss": 0.6364, "step": 2643 }, { "epoch": 0.5105232670399691, "grad_norm": 0.6935403347015381, "learning_rate": 9.283626704277574e-05, "loss": 0.6559, "step": 2644 }, { "epoch": 0.5107163545085924, "grad_norm": 0.6570299863815308, "learning_rate": 9.282757361933057e-05, "loss": 0.6652, "step": 2645 }, { "epoch": 0.5109094419772157, "grad_norm": 0.6340612769126892, "learning_rate": 9.281887533175717e-05, "loss": 0.6563, "step": 2646 }, { "epoch": 0.511102529445839, "grad_norm": 0.697006106376648, "learning_rate": 9.281017218104346e-05, "loss": 0.6197, "step": 2647 }, { "epoch": 0.5112956169144622, "grad_norm": 0.6121150851249695, "learning_rate": 9.28014641681779e-05, "loss": 0.7156, "step": 2648 }, { "epoch": 0.5114887043830856, "grad_norm": 0.7325787544250488, "learning_rate": 9.27927512941495e-05, "loss": 0.7265, "step": 2649 }, { "epoch": 0.5116817918517088, "grad_norm": 1.0360033512115479, "learning_rate": 9.27840335599478e-05, "loss": 0.6315, "step": 2650 }, { "epoch": 0.5118748793203322, "grad_norm": 0.5835889577865601, "learning_rate": 9.277531096656293e-05, "loss": 0.6483, "step": 2651 }, { "epoch": 0.5120679667889554, "grad_norm": 0.6530457139015198, "learning_rate": 9.276658351498557e-05, "loss": 0.6679, "step": 2652 }, { "epoch": 0.5122610542575787, "grad_norm": 0.49845027923583984, "learning_rate": 9.275785120620693e-05, "loss": 0.6475, "step": 2653 }, { "epoch": 0.512454141726202, "grad_norm": 0.5994361639022827, "learning_rate": 9.274911404121876e-05, "loss": 0.6503, "step": 2654 }, { "epoch": 0.5126472291948253, "grad_norm": 0.478980153799057, "learning_rate": 9.27403720210134e-05, "loss": 0.6451, "step": 2655 }, { "epoch": 0.5128403166634485, "grad_norm": 0.7148911356925964, "learning_rate": 9.273162514658371e-05, "loss": 0.6524, "step": 2656 }, { "epoch": 0.5130334041320719, "grad_norm": 0.6740654110908508, "learning_rate": 9.272287341892313e-05, "loss": 0.7429, "step": 2657 }, { "epoch": 0.5132264916006951, "grad_norm": 0.554735541343689, "learning_rate": 9.271411683902562e-05, "loss": 0.687, "step": 2658 }, { "epoch": 0.5134195790693185, "grad_norm": 0.6614316701889038, "learning_rate": 9.27053554078857e-05, "loss": 0.6505, "step": 2659 }, { "epoch": 0.5136126665379417, "grad_norm": 0.6300486326217651, "learning_rate": 9.269658912649846e-05, "loss": 0.6789, "step": 2660 }, { "epoch": 0.5138057540065649, "grad_norm": 0.5594739317893982, "learning_rate": 9.268781799585953e-05, "loss": 0.647, "step": 2661 }, { "epoch": 0.5139988414751883, "grad_norm": 2.58897066116333, "learning_rate": 9.267904201696509e-05, "loss": 0.6873, "step": 2662 }, { "epoch": 0.5141919289438115, "grad_norm": 0.7583262324333191, "learning_rate": 9.267026119081186e-05, "loss": 0.6744, "step": 2663 }, { "epoch": 0.5143850164124348, "grad_norm": 0.658414363861084, "learning_rate": 9.26614755183971e-05, "loss": 0.6303, "step": 2664 }, { "epoch": 0.5145781038810581, "grad_norm": 0.9228017926216125, "learning_rate": 9.265268500071868e-05, "loss": 0.7072, "step": 2665 }, { "epoch": 0.5147711913496814, "grad_norm": 0.585919201374054, "learning_rate": 9.264388963877497e-05, "loss": 0.6706, "step": 2666 }, { "epoch": 0.5149642788183046, "grad_norm": 0.9343395233154297, "learning_rate": 9.263508943356487e-05, "loss": 0.6401, "step": 2667 }, { "epoch": 0.515157366286928, "grad_norm": 0.8579326272010803, "learning_rate": 9.26262843860879e-05, "loss": 0.7248, "step": 2668 }, { "epoch": 0.5153504537555512, "grad_norm": 0.842863142490387, "learning_rate": 9.261747449734407e-05, "loss": 0.6547, "step": 2669 }, { "epoch": 0.5155435412241746, "grad_norm": 0.8672675490379333, "learning_rate": 9.260865976833397e-05, "loss": 0.6459, "step": 2670 }, { "epoch": 0.5157366286927978, "grad_norm": 0.5813927054405212, "learning_rate": 9.259984020005871e-05, "loss": 0.6238, "step": 2671 }, { "epoch": 0.5159297161614211, "grad_norm": 0.47537487745285034, "learning_rate": 9.259101579351998e-05, "loss": 0.583, "step": 2672 }, { "epoch": 0.5161228036300444, "grad_norm": 0.9888963103294373, "learning_rate": 9.258218654972002e-05, "loss": 0.6894, "step": 2673 }, { "epoch": 0.5163158910986677, "grad_norm": 0.8524836897850037, "learning_rate": 9.257335246966161e-05, "loss": 0.7036, "step": 2674 }, { "epoch": 0.5165089785672909, "grad_norm": 0.70331209897995, "learning_rate": 9.256451355434806e-05, "loss": 0.6219, "step": 2675 }, { "epoch": 0.5167020660359143, "grad_norm": 0.7211571335792542, "learning_rate": 9.255566980478326e-05, "loss": 0.6692, "step": 2676 }, { "epoch": 0.5168951535045375, "grad_norm": 1.2142720222473145, "learning_rate": 9.254682122197164e-05, "loss": 0.6141, "step": 2677 }, { "epoch": 0.5170882409731609, "grad_norm": 0.7519416213035583, "learning_rate": 9.253796780691814e-05, "loss": 0.6742, "step": 2678 }, { "epoch": 0.5172813284417841, "grad_norm": 1.1664642095565796, "learning_rate": 9.252910956062835e-05, "loss": 0.6144, "step": 2679 }, { "epoch": 0.5174744159104074, "grad_norm": 1.1416723728179932, "learning_rate": 9.252024648410827e-05, "loss": 0.6175, "step": 2680 }, { "epoch": 0.5176675033790307, "grad_norm": 0.6508283615112305, "learning_rate": 9.251137857836458e-05, "loss": 0.6648, "step": 2681 }, { "epoch": 0.517860590847654, "grad_norm": 0.7738400101661682, "learning_rate": 9.250250584440441e-05, "loss": 0.6425, "step": 2682 }, { "epoch": 0.5180536783162772, "grad_norm": 0.6099202632904053, "learning_rate": 9.249362828323549e-05, "loss": 0.5963, "step": 2683 }, { "epoch": 0.5182467657849006, "grad_norm": 0.6054527163505554, "learning_rate": 9.248474589586609e-05, "loss": 0.6485, "step": 2684 }, { "epoch": 0.5184398532535238, "grad_norm": 1.0398955345153809, "learning_rate": 9.247585868330503e-05, "loss": 0.6956, "step": 2685 }, { "epoch": 0.5186329407221472, "grad_norm": 0.4704466462135315, "learning_rate": 9.246696664656167e-05, "loss": 0.6719, "step": 2686 }, { "epoch": 0.5188260281907704, "grad_norm": 0.8277133703231812, "learning_rate": 9.245806978664589e-05, "loss": 0.6403, "step": 2687 }, { "epoch": 0.5190191156593937, "grad_norm": 0.9141622185707092, "learning_rate": 9.244916810456821e-05, "loss": 0.684, "step": 2688 }, { "epoch": 0.519212203128017, "grad_norm": 0.6636105179786682, "learning_rate": 9.244026160133958e-05, "loss": 0.642, "step": 2689 }, { "epoch": 0.5194052905966403, "grad_norm": 0.7234370708465576, "learning_rate": 9.243135027797157e-05, "loss": 0.6743, "step": 2690 }, { "epoch": 0.5195983780652635, "grad_norm": 0.8129099011421204, "learning_rate": 9.242243413547628e-05, "loss": 0.7092, "step": 2691 }, { "epoch": 0.5197914655338869, "grad_norm": 0.9910418391227722, "learning_rate": 9.241351317486637e-05, "loss": 0.688, "step": 2692 }, { "epoch": 0.5199845530025101, "grad_norm": 0.720128059387207, "learning_rate": 9.240458739715502e-05, "loss": 0.6761, "step": 2693 }, { "epoch": 0.5201776404711335, "grad_norm": 0.725890040397644, "learning_rate": 9.239565680335598e-05, "loss": 0.5922, "step": 2694 }, { "epoch": 0.5203707279397567, "grad_norm": 0.7883195281028748, "learning_rate": 9.238672139448354e-05, "loss": 0.6208, "step": 2695 }, { "epoch": 0.52056381540838, "grad_norm": 0.5453859567642212, "learning_rate": 9.237778117155256e-05, "loss": 0.6294, "step": 2696 }, { "epoch": 0.5207569028770033, "grad_norm": 0.6664617657661438, "learning_rate": 9.236883613557836e-05, "loss": 0.6286, "step": 2697 }, { "epoch": 0.5209499903456266, "grad_norm": 0.5052761435508728, "learning_rate": 9.235988628757693e-05, "loss": 0.6842, "step": 2698 }, { "epoch": 0.5211430778142498, "grad_norm": 0.6877129673957825, "learning_rate": 9.235093162856472e-05, "loss": 0.7335, "step": 2699 }, { "epoch": 0.5213361652828732, "grad_norm": 0.9097678661346436, "learning_rate": 9.234197215955875e-05, "loss": 0.7307, "step": 2700 }, { "epoch": 0.5215292527514964, "grad_norm": 0.5959782004356384, "learning_rate": 9.233300788157659e-05, "loss": 0.6958, "step": 2701 }, { "epoch": 0.5217223402201198, "grad_norm": 0.6034286618232727, "learning_rate": 9.232403879563639e-05, "loss": 0.6835, "step": 2702 }, { "epoch": 0.521915427688743, "grad_norm": 1.0461238622665405, "learning_rate": 9.231506490275674e-05, "loss": 0.7046, "step": 2703 }, { "epoch": 0.5221085151573663, "grad_norm": 0.6698222756385803, "learning_rate": 9.23060862039569e-05, "loss": 0.694, "step": 2704 }, { "epoch": 0.5223016026259896, "grad_norm": 0.6769505739212036, "learning_rate": 9.229710270025664e-05, "loss": 0.6791, "step": 2705 }, { "epoch": 0.5224946900946129, "grad_norm": 0.6612290143966675, "learning_rate": 9.228811439267622e-05, "loss": 0.7026, "step": 2706 }, { "epoch": 0.5226877775632361, "grad_norm": 1.285544753074646, "learning_rate": 9.227912128223648e-05, "loss": 0.6622, "step": 2707 }, { "epoch": 0.5228808650318595, "grad_norm": 0.6106311082839966, "learning_rate": 9.227012336995882e-05, "loss": 0.7088, "step": 2708 }, { "epoch": 0.5230739525004827, "grad_norm": 4.462644577026367, "learning_rate": 9.226112065686519e-05, "loss": 0.6967, "step": 2709 }, { "epoch": 0.523267039969106, "grad_norm": 0.8190000653266907, "learning_rate": 9.225211314397806e-05, "loss": 0.7299, "step": 2710 }, { "epoch": 0.5234601274377293, "grad_norm": 1.1024448871612549, "learning_rate": 9.224310083232044e-05, "loss": 0.7215, "step": 2711 }, { "epoch": 0.5236532149063525, "grad_norm": 0.6415221095085144, "learning_rate": 9.223408372291593e-05, "loss": 0.6512, "step": 2712 }, { "epoch": 0.5238463023749759, "grad_norm": 0.736937940120697, "learning_rate": 9.222506181678863e-05, "loss": 0.7064, "step": 2713 }, { "epoch": 0.5240393898435991, "grad_norm": 1.0226860046386719, "learning_rate": 9.22160351149632e-05, "loss": 0.6314, "step": 2714 }, { "epoch": 0.5242324773122224, "grad_norm": 0.630765438079834, "learning_rate": 9.220700361846484e-05, "loss": 0.6165, "step": 2715 }, { "epoch": 0.5244255647808457, "grad_norm": 0.9164434671401978, "learning_rate": 9.219796732831932e-05, "loss": 0.6889, "step": 2716 }, { "epoch": 0.524618652249469, "grad_norm": 0.7600204944610596, "learning_rate": 9.218892624555289e-05, "loss": 0.6637, "step": 2717 }, { "epoch": 0.5248117397180923, "grad_norm": 1.1205761432647705, "learning_rate": 9.217988037119242e-05, "loss": 0.6241, "step": 2718 }, { "epoch": 0.5250048271867156, "grad_norm": 1.0928964614868164, "learning_rate": 9.217082970626532e-05, "loss": 0.6389, "step": 2719 }, { "epoch": 0.5251979146553388, "grad_norm": 0.5925130248069763, "learning_rate": 9.216177425179946e-05, "loss": 0.6523, "step": 2720 }, { "epoch": 0.5253910021239622, "grad_norm": 0.7235034704208374, "learning_rate": 9.215271400882335e-05, "loss": 0.6032, "step": 2721 }, { "epoch": 0.5255840895925854, "grad_norm": 0.6221244931221008, "learning_rate": 9.214364897836598e-05, "loss": 0.6679, "step": 2722 }, { "epoch": 0.5257771770612087, "grad_norm": 0.9269753098487854, "learning_rate": 9.213457916145692e-05, "loss": 0.6777, "step": 2723 }, { "epoch": 0.525970264529832, "grad_norm": 0.806962788105011, "learning_rate": 9.212550455912628e-05, "loss": 0.6507, "step": 2724 }, { "epoch": 0.5261633519984553, "grad_norm": 0.948286235332489, "learning_rate": 9.211642517240469e-05, "loss": 0.6669, "step": 2725 }, { "epoch": 0.5263564394670786, "grad_norm": 1.0166380405426025, "learning_rate": 9.210734100232335e-05, "loss": 0.6955, "step": 2726 }, { "epoch": 0.5265495269357019, "grad_norm": 0.8954406380653381, "learning_rate": 9.209825204991395e-05, "loss": 0.6124, "step": 2727 }, { "epoch": 0.5267426144043251, "grad_norm": 0.8412950038909912, "learning_rate": 9.208915831620884e-05, "loss": 0.6818, "step": 2728 }, { "epoch": 0.5269357018729485, "grad_norm": 0.8627844452857971, "learning_rate": 9.208005980224078e-05, "loss": 0.5938, "step": 2729 }, { "epoch": 0.5271287893415717, "grad_norm": 1.7354580163955688, "learning_rate": 9.207095650904316e-05, "loss": 0.6721, "step": 2730 }, { "epoch": 0.527321876810195, "grad_norm": 0.7966617941856384, "learning_rate": 9.206184843764987e-05, "loss": 0.6742, "step": 2731 }, { "epoch": 0.5275149642788183, "grad_norm": 0.8404974937438965, "learning_rate": 9.205273558909535e-05, "loss": 0.5887, "step": 2732 }, { "epoch": 0.5277080517474416, "grad_norm": 0.9202344417572021, "learning_rate": 9.20436179644146e-05, "loss": 0.6006, "step": 2733 }, { "epoch": 0.5279011392160649, "grad_norm": 0.5754425525665283, "learning_rate": 9.203449556464315e-05, "loss": 0.6604, "step": 2734 }, { "epoch": 0.5280942266846882, "grad_norm": 0.5959469079971313, "learning_rate": 9.202536839081708e-05, "loss": 0.5681, "step": 2735 }, { "epoch": 0.5282873141533114, "grad_norm": 1.4507298469543457, "learning_rate": 9.2016236443973e-05, "loss": 0.674, "step": 2736 }, { "epoch": 0.5284804016219348, "grad_norm": 0.7462664842605591, "learning_rate": 9.200709972514805e-05, "loss": 0.623, "step": 2737 }, { "epoch": 0.528673489090558, "grad_norm": 0.8781020641326904, "learning_rate": 9.199795823537995e-05, "loss": 0.7127, "step": 2738 }, { "epoch": 0.5288665765591813, "grad_norm": 1.0332566499710083, "learning_rate": 9.198881197570694e-05, "loss": 0.6172, "step": 2739 }, { "epoch": 0.5290596640278046, "grad_norm": 1.3209236860275269, "learning_rate": 9.197966094716781e-05, "loss": 0.6513, "step": 2740 }, { "epoch": 0.5292527514964279, "grad_norm": 1.0098249912261963, "learning_rate": 9.197050515080187e-05, "loss": 0.6872, "step": 2741 }, { "epoch": 0.5294458389650512, "grad_norm": 0.9086322784423828, "learning_rate": 9.1961344587649e-05, "loss": 0.6543, "step": 2742 }, { "epoch": 0.5296389264336745, "grad_norm": 0.7159903645515442, "learning_rate": 9.19521792587496e-05, "loss": 0.7025, "step": 2743 }, { "epoch": 0.5298320139022977, "grad_norm": 0.8308739066123962, "learning_rate": 9.194300916514462e-05, "loss": 0.7098, "step": 2744 }, { "epoch": 0.5300251013709211, "grad_norm": 1.0419752597808838, "learning_rate": 9.193383430787557e-05, "loss": 0.6214, "step": 2745 }, { "epoch": 0.5302181888395443, "grad_norm": 0.7980696558952332, "learning_rate": 9.192465468798443e-05, "loss": 0.6974, "step": 2746 }, { "epoch": 0.5304112763081676, "grad_norm": 1.542602300643921, "learning_rate": 9.191547030651383e-05, "loss": 0.6665, "step": 2747 }, { "epoch": 0.5306043637767909, "grad_norm": 0.5791041254997253, "learning_rate": 9.190628116450686e-05, "loss": 0.6259, "step": 2748 }, { "epoch": 0.5307974512454142, "grad_norm": 1.3619190454483032, "learning_rate": 9.189708726300717e-05, "loss": 0.6177, "step": 2749 }, { "epoch": 0.5309905387140375, "grad_norm": 0.6980944871902466, "learning_rate": 9.188788860305894e-05, "loss": 0.5999, "step": 2750 }, { "epoch": 0.5311836261826608, "grad_norm": 0.7834179997444153, "learning_rate": 9.187868518570695e-05, "loss": 0.6792, "step": 2751 }, { "epoch": 0.531376713651284, "grad_norm": 0.648379921913147, "learning_rate": 9.186947701199641e-05, "loss": 0.6767, "step": 2752 }, { "epoch": 0.5315698011199074, "grad_norm": 0.7790149450302124, "learning_rate": 9.186026408297321e-05, "loss": 0.7089, "step": 2753 }, { "epoch": 0.5317628885885306, "grad_norm": 0.7176534533500671, "learning_rate": 9.185104639968365e-05, "loss": 0.6518, "step": 2754 }, { "epoch": 0.531955976057154, "grad_norm": 0.8995149731636047, "learning_rate": 9.184182396317466e-05, "loss": 0.6007, "step": 2755 }, { "epoch": 0.5321490635257772, "grad_norm": 0.991091787815094, "learning_rate": 9.183259677449364e-05, "loss": 0.638, "step": 2756 }, { "epoch": 0.5323421509944004, "grad_norm": 0.76715087890625, "learning_rate": 9.18233648346886e-05, "loss": 0.6614, "step": 2757 }, { "epoch": 0.5325352384630238, "grad_norm": 3.4839401245117188, "learning_rate": 9.181412814480804e-05, "loss": 0.6573, "step": 2758 }, { "epoch": 0.532728325931647, "grad_norm": 0.8271551728248596, "learning_rate": 9.1804886705901e-05, "loss": 0.6032, "step": 2759 }, { "epoch": 0.5329214134002703, "grad_norm": 0.7209429144859314, "learning_rate": 9.179564051901708e-05, "loss": 0.6738, "step": 2760 }, { "epoch": 0.5331145008688936, "grad_norm": 0.8350620269775391, "learning_rate": 9.178638958520644e-05, "loss": 0.6634, "step": 2761 }, { "epoch": 0.5333075883375169, "grad_norm": 0.8793646693229675, "learning_rate": 9.177713390551971e-05, "loss": 0.6623, "step": 2762 }, { "epoch": 0.5335006758061401, "grad_norm": 0.7139442563056946, "learning_rate": 9.176787348100815e-05, "loss": 0.6198, "step": 2763 }, { "epoch": 0.5336937632747635, "grad_norm": 1.2322332859039307, "learning_rate": 9.175860831272347e-05, "loss": 0.6339, "step": 2764 }, { "epoch": 0.5338868507433867, "grad_norm": 0.7989500164985657, "learning_rate": 9.174933840171798e-05, "loss": 0.63, "step": 2765 }, { "epoch": 0.53407993821201, "grad_norm": 1.3209025859832764, "learning_rate": 9.174006374904447e-05, "loss": 0.7017, "step": 2766 }, { "epoch": 0.5342730256806333, "grad_norm": 1.866559624671936, "learning_rate": 9.173078435575635e-05, "loss": 0.6652, "step": 2767 }, { "epoch": 0.5344661131492566, "grad_norm": 3.7603752613067627, "learning_rate": 9.172150022290751e-05, "loss": 0.7068, "step": 2768 }, { "epoch": 0.5346592006178799, "grad_norm": 0.9640856385231018, "learning_rate": 9.17122113515524e-05, "loss": 0.6101, "step": 2769 }, { "epoch": 0.5348522880865032, "grad_norm": 0.6741225719451904, "learning_rate": 9.170291774274597e-05, "loss": 0.7279, "step": 2770 }, { "epoch": 0.5350453755551264, "grad_norm": 2.3559608459472656, "learning_rate": 9.169361939754377e-05, "loss": 0.6139, "step": 2771 }, { "epoch": 0.5352384630237498, "grad_norm": 2.536846160888672, "learning_rate": 9.168431631700185e-05, "loss": 0.6494, "step": 2772 }, { "epoch": 0.535431550492373, "grad_norm": 1.062227487564087, "learning_rate": 9.16750085021768e-05, "loss": 0.5863, "step": 2773 }, { "epoch": 0.5356246379609964, "grad_norm": 0.8144372701644897, "learning_rate": 9.166569595412575e-05, "loss": 0.597, "step": 2774 }, { "epoch": 0.5358177254296196, "grad_norm": 2.551976203918457, "learning_rate": 9.165637867390638e-05, "loss": 0.6734, "step": 2775 }, { "epoch": 0.5360108128982429, "grad_norm": 3.936314821243286, "learning_rate": 9.164705666257688e-05, "loss": 0.6672, "step": 2776 }, { "epoch": 0.5362039003668662, "grad_norm": 3.961988925933838, "learning_rate": 9.163772992119601e-05, "loss": 0.6602, "step": 2777 }, { "epoch": 0.5363969878354895, "grad_norm": 0.8703494668006897, "learning_rate": 9.162839845082305e-05, "loss": 0.7002, "step": 2778 }, { "epoch": 0.5365900753041127, "grad_norm": 0.7770414352416992, "learning_rate": 9.16190622525178e-05, "loss": 0.6843, "step": 2779 }, { "epoch": 0.5367831627727361, "grad_norm": 0.8397629857063293, "learning_rate": 9.160972132734064e-05, "loss": 0.6651, "step": 2780 }, { "epoch": 0.5369762502413593, "grad_norm": 1.0918587446212769, "learning_rate": 9.160037567635246e-05, "loss": 0.5995, "step": 2781 }, { "epoch": 0.5371693377099827, "grad_norm": 4.400223255157471, "learning_rate": 9.159102530061467e-05, "loss": 0.6348, "step": 2782 }, { "epoch": 0.5373624251786059, "grad_norm": 0.700005292892456, "learning_rate": 9.158167020118927e-05, "loss": 0.624, "step": 2783 }, { "epoch": 0.5375555126472292, "grad_norm": 1.2785675525665283, "learning_rate": 9.157231037913871e-05, "loss": 0.6911, "step": 2784 }, { "epoch": 0.5377486001158525, "grad_norm": 1.054017424583435, "learning_rate": 9.156294583552609e-05, "loss": 0.6615, "step": 2785 }, { "epoch": 0.5379416875844758, "grad_norm": 0.614943265914917, "learning_rate": 9.155357657141493e-05, "loss": 0.6575, "step": 2786 }, { "epoch": 0.538134775053099, "grad_norm": 0.7167116403579712, "learning_rate": 9.154420258786938e-05, "loss": 0.644, "step": 2787 }, { "epoch": 0.5383278625217224, "grad_norm": 1.734275221824646, "learning_rate": 9.153482388595407e-05, "loss": 0.646, "step": 2788 }, { "epoch": 0.5385209499903456, "grad_norm": 1.4503546953201294, "learning_rate": 9.152544046673419e-05, "loss": 0.658, "step": 2789 }, { "epoch": 0.538714037458969, "grad_norm": 2.125028610229492, "learning_rate": 9.151605233127546e-05, "loss": 0.6698, "step": 2790 }, { "epoch": 0.5389071249275922, "grad_norm": 1.3326539993286133, "learning_rate": 9.150665948064411e-05, "loss": 0.6088, "step": 2791 }, { "epoch": 0.5391002123962155, "grad_norm": 1.5974715948104858, "learning_rate": 9.149726191590697e-05, "loss": 0.6128, "step": 2792 }, { "epoch": 0.5392932998648388, "grad_norm": 1.7639546394348145, "learning_rate": 9.148785963813135e-05, "loss": 0.7018, "step": 2793 }, { "epoch": 0.5394863873334621, "grad_norm": 0.7082322835922241, "learning_rate": 9.147845264838509e-05, "loss": 0.6272, "step": 2794 }, { "epoch": 0.5396794748020853, "grad_norm": 0.839641809463501, "learning_rate": 9.14690409477366e-05, "loss": 0.695, "step": 2795 }, { "epoch": 0.5398725622707087, "grad_norm": 0.7219635248184204, "learning_rate": 9.145962453725483e-05, "loss": 0.6661, "step": 2796 }, { "epoch": 0.5400656497393319, "grad_norm": 1.1352496147155762, "learning_rate": 9.145020341800923e-05, "loss": 0.703, "step": 2797 }, { "epoch": 0.5402587372079553, "grad_norm": 0.6716598868370056, "learning_rate": 9.144077759106979e-05, "loss": 0.6377, "step": 2798 }, { "epoch": 0.5404518246765785, "grad_norm": 0.6377519965171814, "learning_rate": 9.143134705750707e-05, "loss": 0.6826, "step": 2799 }, { "epoch": 0.5406449121452018, "grad_norm": 1.1485097408294678, "learning_rate": 9.14219118183921e-05, "loss": 0.6387, "step": 2800 }, { "epoch": 0.5408379996138251, "grad_norm": 0.9489761590957642, "learning_rate": 9.141247187479652e-05, "loss": 0.7084, "step": 2801 }, { "epoch": 0.5410310870824484, "grad_norm": 0.7788481712341309, "learning_rate": 9.140302722779245e-05, "loss": 0.6922, "step": 2802 }, { "epoch": 0.5412241745510716, "grad_norm": 0.864056408405304, "learning_rate": 9.13935778784526e-05, "loss": 0.6794, "step": 2803 }, { "epoch": 0.5414172620196949, "grad_norm": 0.9927869439125061, "learning_rate": 9.138412382785012e-05, "loss": 0.6469, "step": 2804 }, { "epoch": 0.5416103494883182, "grad_norm": 0.7989642024040222, "learning_rate": 9.137466507705878e-05, "loss": 0.7055, "step": 2805 }, { "epoch": 0.5418034369569414, "grad_norm": 1.3409204483032227, "learning_rate": 9.136520162715287e-05, "loss": 0.6219, "step": 2806 }, { "epoch": 0.5419965244255648, "grad_norm": 0.9876454472541809, "learning_rate": 9.135573347920716e-05, "loss": 0.5914, "step": 2807 }, { "epoch": 0.542189611894188, "grad_norm": 1.0046910047531128, "learning_rate": 9.134626063429704e-05, "loss": 0.6748, "step": 2808 }, { "epoch": 0.5423826993628114, "grad_norm": 0.7351843118667603, "learning_rate": 9.133678309349834e-05, "loss": 0.675, "step": 2809 }, { "epoch": 0.5425757868314346, "grad_norm": 0.8256557583808899, "learning_rate": 9.132730085788749e-05, "loss": 0.633, "step": 2810 }, { "epoch": 0.5427688743000579, "grad_norm": 0.7309636473655701, "learning_rate": 9.131781392854143e-05, "loss": 0.6149, "step": 2811 }, { "epoch": 0.5429619617686812, "grad_norm": 0.7266411781311035, "learning_rate": 9.130832230653763e-05, "loss": 0.6421, "step": 2812 }, { "epoch": 0.5431550492373045, "grad_norm": 0.9593621492385864, "learning_rate": 9.129882599295412e-05, "loss": 0.6884, "step": 2813 }, { "epoch": 0.5433481367059277, "grad_norm": 0.7657887935638428, "learning_rate": 9.128932498886939e-05, "loss": 0.6453, "step": 2814 }, { "epoch": 0.5435412241745511, "grad_norm": 2.0771291255950928, "learning_rate": 9.127981929536258e-05, "loss": 0.641, "step": 2815 }, { "epoch": 0.5437343116431743, "grad_norm": 0.6171072721481323, "learning_rate": 9.127030891351324e-05, "loss": 0.6376, "step": 2816 }, { "epoch": 0.5439273991117977, "grad_norm": 1.1532974243164062, "learning_rate": 9.126079384440155e-05, "loss": 0.6845, "step": 2817 }, { "epoch": 0.5441204865804209, "grad_norm": 1.3613345623016357, "learning_rate": 9.125127408910816e-05, "loss": 0.7279, "step": 2818 }, { "epoch": 0.5443135740490442, "grad_norm": 0.5918445587158203, "learning_rate": 9.124174964871428e-05, "loss": 0.6934, "step": 2819 }, { "epoch": 0.5445066615176675, "grad_norm": 0.5321668386459351, "learning_rate": 9.123222052430163e-05, "loss": 0.6225, "step": 2820 }, { "epoch": 0.5446997489862908, "grad_norm": 3.2418746948242188, "learning_rate": 9.122268671695248e-05, "loss": 0.6984, "step": 2821 }, { "epoch": 0.544892836454914, "grad_norm": 0.7318786978721619, "learning_rate": 9.121314822774965e-05, "loss": 0.5935, "step": 2822 }, { "epoch": 0.5450859239235374, "grad_norm": 0.5978460311889648, "learning_rate": 9.120360505777645e-05, "loss": 0.73, "step": 2823 }, { "epoch": 0.5452790113921606, "grad_norm": 0.8223179578781128, "learning_rate": 9.119405720811678e-05, "loss": 0.6423, "step": 2824 }, { "epoch": 0.545472098860784, "grad_norm": 0.5715242028236389, "learning_rate": 9.118450467985498e-05, "loss": 0.6192, "step": 2825 }, { "epoch": 0.5456651863294072, "grad_norm": 0.6112996339797974, "learning_rate": 9.1174947474076e-05, "loss": 0.7032, "step": 2826 }, { "epoch": 0.5458582737980305, "grad_norm": 0.700911819934845, "learning_rate": 9.11653855918653e-05, "loss": 0.6695, "step": 2827 }, { "epoch": 0.5460513612666538, "grad_norm": 1.9201222658157349, "learning_rate": 9.115581903430887e-05, "loss": 0.6455, "step": 2828 }, { "epoch": 0.5462444487352771, "grad_norm": 0.7693322896957397, "learning_rate": 9.114624780249322e-05, "loss": 0.6316, "step": 2829 }, { "epoch": 0.5464375362039003, "grad_norm": 0.4754278063774109, "learning_rate": 9.11366718975054e-05, "loss": 0.6172, "step": 2830 }, { "epoch": 0.5466306236725237, "grad_norm": 0.64048832654953, "learning_rate": 9.1127091320433e-05, "loss": 0.6289, "step": 2831 }, { "epoch": 0.5468237111411469, "grad_norm": 0.657078742980957, "learning_rate": 9.111750607236411e-05, "loss": 0.676, "step": 2832 }, { "epoch": 0.5470167986097703, "grad_norm": 0.7248742580413818, "learning_rate": 9.11079161543874e-05, "loss": 0.7218, "step": 2833 }, { "epoch": 0.5472098860783935, "grad_norm": 0.5507818460464478, "learning_rate": 9.109832156759204e-05, "loss": 0.6948, "step": 2834 }, { "epoch": 0.5474029735470168, "grad_norm": 0.727764904499054, "learning_rate": 9.108872231306771e-05, "loss": 0.663, "step": 2835 }, { "epoch": 0.5475960610156401, "grad_norm": 0.918867290019989, "learning_rate": 9.107911839190464e-05, "loss": 0.6463, "step": 2836 }, { "epoch": 0.5477891484842634, "grad_norm": 1.0595858097076416, "learning_rate": 9.106950980519362e-05, "loss": 0.6632, "step": 2837 }, { "epoch": 0.5479822359528866, "grad_norm": 0.7651029825210571, "learning_rate": 9.105989655402591e-05, "loss": 0.6604, "step": 2838 }, { "epoch": 0.54817532342151, "grad_norm": 0.5146529674530029, "learning_rate": 9.105027863949339e-05, "loss": 0.6526, "step": 2839 }, { "epoch": 0.5483684108901332, "grad_norm": 0.9783151745796204, "learning_rate": 9.104065606268833e-05, "loss": 0.629, "step": 2840 }, { "epoch": 0.5485614983587566, "grad_norm": 0.8490777015686035, "learning_rate": 9.103102882470367e-05, "loss": 0.7193, "step": 2841 }, { "epoch": 0.5487545858273798, "grad_norm": 0.5222064256668091, "learning_rate": 9.102139692663278e-05, "loss": 0.724, "step": 2842 }, { "epoch": 0.5489476732960031, "grad_norm": 0.5797905921936035, "learning_rate": 9.101176036956965e-05, "loss": 0.6725, "step": 2843 }, { "epoch": 0.5491407607646264, "grad_norm": 0.8067662715911865, "learning_rate": 9.10021191546087e-05, "loss": 0.6381, "step": 2844 }, { "epoch": 0.5493338482332497, "grad_norm": 0.7005948424339294, "learning_rate": 9.099247328284497e-05, "loss": 0.6581, "step": 2845 }, { "epoch": 0.5495269357018729, "grad_norm": 1.5461270809173584, "learning_rate": 9.098282275537395e-05, "loss": 0.6464, "step": 2846 }, { "epoch": 0.5497200231704963, "grad_norm": 0.8157663941383362, "learning_rate": 9.097316757329173e-05, "loss": 0.6756, "step": 2847 }, { "epoch": 0.5499131106391195, "grad_norm": 0.560577392578125, "learning_rate": 9.096350773769485e-05, "loss": 0.655, "step": 2848 }, { "epoch": 0.5501061981077429, "grad_norm": 1.2338660955429077, "learning_rate": 9.095384324968047e-05, "loss": 0.641, "step": 2849 }, { "epoch": 0.5502992855763661, "grad_norm": 0.5991606116294861, "learning_rate": 9.09441741103462e-05, "loss": 0.5931, "step": 2850 }, { "epoch": 0.5504923730449893, "grad_norm": 0.5967409014701843, "learning_rate": 9.093450032079023e-05, "loss": 0.6524, "step": 2851 }, { "epoch": 0.5506854605136127, "grad_norm": 1.1686968803405762, "learning_rate": 9.092482188211125e-05, "loss": 0.6323, "step": 2852 }, { "epoch": 0.5508785479822359, "grad_norm": 1.0365736484527588, "learning_rate": 9.091513879540845e-05, "loss": 0.6065, "step": 2853 }, { "epoch": 0.5510716354508592, "grad_norm": 1.238825798034668, "learning_rate": 9.090545106178166e-05, "loss": 0.7055, "step": 2854 }, { "epoch": 0.5512647229194825, "grad_norm": 0.7385430932044983, "learning_rate": 9.089575868233109e-05, "loss": 0.6427, "step": 2855 }, { "epoch": 0.5514578103881058, "grad_norm": 0.5958442687988281, "learning_rate": 9.088606165815757e-05, "loss": 0.7001, "step": 2856 }, { "epoch": 0.551650897856729, "grad_norm": 0.8285045027732849, "learning_rate": 9.087635999036246e-05, "loss": 0.7417, "step": 2857 }, { "epoch": 0.5518439853253524, "grad_norm": 0.612348735332489, "learning_rate": 9.086665368004761e-05, "loss": 0.6282, "step": 2858 }, { "epoch": 0.5520370727939756, "grad_norm": 1.6527631282806396, "learning_rate": 9.08569427283154e-05, "loss": 0.6416, "step": 2859 }, { "epoch": 0.552230160262599, "grad_norm": 0.6871379017829895, "learning_rate": 9.084722713626874e-05, "loss": 0.7116, "step": 2860 }, { "epoch": 0.5524232477312222, "grad_norm": 0.931252658367157, "learning_rate": 9.083750690501111e-05, "loss": 0.7011, "step": 2861 }, { "epoch": 0.5526163351998455, "grad_norm": 0.6297490000724792, "learning_rate": 9.082778203564648e-05, "loss": 0.6854, "step": 2862 }, { "epoch": 0.5528094226684688, "grad_norm": 1.0212819576263428, "learning_rate": 9.08180525292793e-05, "loss": 0.6794, "step": 2863 }, { "epoch": 0.5530025101370921, "grad_norm": 0.5906499624252319, "learning_rate": 9.080831838701464e-05, "loss": 0.6087, "step": 2864 }, { "epoch": 0.5531955976057154, "grad_norm": 0.7378098964691162, "learning_rate": 9.079857960995806e-05, "loss": 0.6776, "step": 2865 }, { "epoch": 0.5533886850743387, "grad_norm": 1.0323609113693237, "learning_rate": 9.07888361992156e-05, "loss": 0.6713, "step": 2866 }, { "epoch": 0.5535817725429619, "grad_norm": 0.7004443407058716, "learning_rate": 9.077908815589389e-05, "loss": 0.652, "step": 2867 }, { "epoch": 0.5537748600115853, "grad_norm": 0.7286086678504944, "learning_rate": 9.076933548110005e-05, "loss": 0.6702, "step": 2868 }, { "epoch": 0.5539679474802085, "grad_norm": 0.8495341539382935, "learning_rate": 9.075957817594174e-05, "loss": 0.6852, "step": 2869 }, { "epoch": 0.5541610349488318, "grad_norm": 0.7567914724349976, "learning_rate": 9.074981624152714e-05, "loss": 0.6475, "step": 2870 }, { "epoch": 0.5543541224174551, "grad_norm": 0.8018556237220764, "learning_rate": 9.074004967896499e-05, "loss": 0.7617, "step": 2871 }, { "epoch": 0.5545472098860784, "grad_norm": 0.8200903534889221, "learning_rate": 9.073027848936447e-05, "loss": 0.682, "step": 2872 }, { "epoch": 0.5547402973547017, "grad_norm": 1.5095988512039185, "learning_rate": 9.072050267383539e-05, "loss": 0.6761, "step": 2873 }, { "epoch": 0.554933384823325, "grad_norm": 0.9742686152458191, "learning_rate": 9.071072223348799e-05, "loss": 0.6505, "step": 2874 }, { "epoch": 0.5551264722919482, "grad_norm": 0.8624967932701111, "learning_rate": 9.07009371694331e-05, "loss": 0.6474, "step": 2875 }, { "epoch": 0.5553195597605716, "grad_norm": 1.2075825929641724, "learning_rate": 9.069114748278208e-05, "loss": 0.6745, "step": 2876 }, { "epoch": 0.5555126472291948, "grad_norm": 0.9195701479911804, "learning_rate": 9.068135317464676e-05, "loss": 0.6551, "step": 2877 }, { "epoch": 0.5557057346978181, "grad_norm": 0.7154941558837891, "learning_rate": 9.067155424613953e-05, "loss": 0.6733, "step": 2878 }, { "epoch": 0.5558988221664414, "grad_norm": 1.6156131029129028, "learning_rate": 9.066175069837331e-05, "loss": 0.6842, "step": 2879 }, { "epoch": 0.5560919096350647, "grad_norm": 2.117314338684082, "learning_rate": 9.065194253246154e-05, "loss": 0.6923, "step": 2880 }, { "epoch": 0.556284997103688, "grad_norm": 0.671265721321106, "learning_rate": 9.064212974951815e-05, "loss": 0.5782, "step": 2881 }, { "epoch": 0.5564780845723113, "grad_norm": 1.6175706386566162, "learning_rate": 9.063231235065765e-05, "loss": 0.6186, "step": 2882 }, { "epoch": 0.5566711720409345, "grad_norm": 0.9873787760734558, "learning_rate": 9.062249033699506e-05, "loss": 0.6656, "step": 2883 }, { "epoch": 0.5568642595095579, "grad_norm": 2.553040027618408, "learning_rate": 9.061266370964587e-05, "loss": 0.6506, "step": 2884 }, { "epoch": 0.5570573469781811, "grad_norm": 0.6359409689903259, "learning_rate": 9.060283246972617e-05, "loss": 0.6286, "step": 2885 }, { "epoch": 0.5572504344468044, "grad_norm": 0.9602515697479248, "learning_rate": 9.059299661835252e-05, "loss": 0.6918, "step": 2886 }, { "epoch": 0.5574435219154277, "grad_norm": 0.8101916313171387, "learning_rate": 9.058315615664203e-05, "loss": 0.6303, "step": 2887 }, { "epoch": 0.557636609384051, "grad_norm": 2.5968000888824463, "learning_rate": 9.057331108571234e-05, "loss": 0.6468, "step": 2888 }, { "epoch": 0.5578296968526743, "grad_norm": 0.7537425756454468, "learning_rate": 9.05634614066816e-05, "loss": 0.6429, "step": 2889 }, { "epoch": 0.5580227843212976, "grad_norm": 0.5420655608177185, "learning_rate": 9.055360712066845e-05, "loss": 0.6021, "step": 2890 }, { "epoch": 0.5582158717899208, "grad_norm": 0.9953858256340027, "learning_rate": 9.054374822879212e-05, "loss": 0.6398, "step": 2891 }, { "epoch": 0.5584089592585442, "grad_norm": 1.0719363689422607, "learning_rate": 9.053388473217234e-05, "loss": 0.6741, "step": 2892 }, { "epoch": 0.5586020467271674, "grad_norm": 1.0892497301101685, "learning_rate": 9.052401663192932e-05, "loss": 0.6453, "step": 2893 }, { "epoch": 0.5587951341957907, "grad_norm": 1.1009929180145264, "learning_rate": 9.051414392918385e-05, "loss": 0.5675, "step": 2894 }, { "epoch": 0.558988221664414, "grad_norm": 0.7379879355430603, "learning_rate": 9.050426662505721e-05, "loss": 0.6484, "step": 2895 }, { "epoch": 0.5591813091330373, "grad_norm": 1.0396287441253662, "learning_rate": 9.049438472067122e-05, "loss": 0.683, "step": 2896 }, { "epoch": 0.5593743966016606, "grad_norm": 0.7214213609695435, "learning_rate": 9.04844982171482e-05, "loss": 0.6876, "step": 2897 }, { "epoch": 0.5595674840702839, "grad_norm": 1.0478016138076782, "learning_rate": 9.047460711561103e-05, "loss": 0.6246, "step": 2898 }, { "epoch": 0.5597605715389071, "grad_norm": 0.9190362691879272, "learning_rate": 9.046471141718305e-05, "loss": 0.6135, "step": 2899 }, { "epoch": 0.5599536590075304, "grad_norm": 0.7104228138923645, "learning_rate": 9.045481112298819e-05, "loss": 0.6306, "step": 2900 }, { "epoch": 0.5601467464761537, "grad_norm": 1.0746058225631714, "learning_rate": 9.044490623415088e-05, "loss": 0.6771, "step": 2901 }, { "epoch": 0.5603398339447769, "grad_norm": 3.147014856338501, "learning_rate": 9.043499675179605e-05, "loss": 0.6584, "step": 2902 }, { "epoch": 0.5605329214134003, "grad_norm": 0.9141179323196411, "learning_rate": 9.042508267704914e-05, "loss": 0.6958, "step": 2903 }, { "epoch": 0.5607260088820235, "grad_norm": 0.769187867641449, "learning_rate": 9.04151640110362e-05, "loss": 0.6274, "step": 2904 }, { "epoch": 0.5609190963506469, "grad_norm": 0.9428219795227051, "learning_rate": 9.04052407548837e-05, "loss": 0.6666, "step": 2905 }, { "epoch": 0.5611121838192701, "grad_norm": 1.4821857213974, "learning_rate": 9.039531290971865e-05, "loss": 0.6114, "step": 2906 }, { "epoch": 0.5613052712878934, "grad_norm": 0.8987532258033752, "learning_rate": 9.038538047666865e-05, "loss": 0.6758, "step": 2907 }, { "epoch": 0.5614983587565167, "grad_norm": 0.6456704139709473, "learning_rate": 9.037544345686175e-05, "loss": 0.7052, "step": 2908 }, { "epoch": 0.56169144622514, "grad_norm": 0.6476702690124512, "learning_rate": 9.036550185142655e-05, "loss": 0.6945, "step": 2909 }, { "epoch": 0.5618845336937632, "grad_norm": 1.0040059089660645, "learning_rate": 9.035555566149216e-05, "loss": 0.7007, "step": 2910 }, { "epoch": 0.5620776211623866, "grad_norm": 1.3788986206054688, "learning_rate": 9.034560488818819e-05, "loss": 0.6856, "step": 2911 }, { "epoch": 0.5622707086310098, "grad_norm": 1.0221117734909058, "learning_rate": 9.033564953264484e-05, "loss": 0.6278, "step": 2912 }, { "epoch": 0.5624637960996332, "grad_norm": 0.8583728671073914, "learning_rate": 9.032568959599276e-05, "loss": 0.6449, "step": 2913 }, { "epoch": 0.5626568835682564, "grad_norm": 0.6303671002388, "learning_rate": 9.031572507936317e-05, "loss": 0.6935, "step": 2914 }, { "epoch": 0.5628499710368797, "grad_norm": 0.7983885407447815, "learning_rate": 9.030575598388775e-05, "loss": 0.6771, "step": 2915 }, { "epoch": 0.563043058505503, "grad_norm": 0.8997114300727844, "learning_rate": 9.029578231069877e-05, "loss": 0.6665, "step": 2916 }, { "epoch": 0.5632361459741263, "grad_norm": 0.6309162378311157, "learning_rate": 9.028580406092898e-05, "loss": 0.6764, "step": 2917 }, { "epoch": 0.5634292334427495, "grad_norm": 2.939418315887451, "learning_rate": 9.027582123571164e-05, "loss": 0.6672, "step": 2918 }, { "epoch": 0.5636223209113729, "grad_norm": 0.976342499256134, "learning_rate": 9.026583383618055e-05, "loss": 0.6189, "step": 2919 }, { "epoch": 0.5638154083799961, "grad_norm": 0.7933691143989563, "learning_rate": 9.025584186347004e-05, "loss": 0.6714, "step": 2920 }, { "epoch": 0.5640084958486195, "grad_norm": 0.6691280007362366, "learning_rate": 9.024584531871496e-05, "loss": 0.585, "step": 2921 }, { "epoch": 0.5642015833172427, "grad_norm": 1.3379027843475342, "learning_rate": 9.023584420305062e-05, "loss": 0.6952, "step": 2922 }, { "epoch": 0.564394670785866, "grad_norm": 0.7635475993156433, "learning_rate": 9.022583851761291e-05, "loss": 0.6558, "step": 2923 }, { "epoch": 0.5645877582544893, "grad_norm": 1.1994267702102661, "learning_rate": 9.021582826353824e-05, "loss": 0.6744, "step": 2924 }, { "epoch": 0.5647808457231126, "grad_norm": 0.647672176361084, "learning_rate": 9.020581344196352e-05, "loss": 0.6226, "step": 2925 }, { "epoch": 0.5649739331917358, "grad_norm": 0.7467062473297119, "learning_rate": 9.019579405402614e-05, "loss": 0.6705, "step": 2926 }, { "epoch": 0.5651670206603592, "grad_norm": 1.1812593936920166, "learning_rate": 9.01857701008641e-05, "loss": 0.6992, "step": 2927 }, { "epoch": 0.5653601081289824, "grad_norm": 1.302390456199646, "learning_rate": 9.017574158361585e-05, "loss": 0.6948, "step": 2928 }, { "epoch": 0.5655531955976058, "grad_norm": 0.8493469953536987, "learning_rate": 9.016570850342036e-05, "loss": 0.6617, "step": 2929 }, { "epoch": 0.565746283066229, "grad_norm": 0.7213636040687561, "learning_rate": 9.015567086141717e-05, "loss": 0.6224, "step": 2930 }, { "epoch": 0.5659393705348523, "grad_norm": 0.7072625160217285, "learning_rate": 9.014562865874625e-05, "loss": 0.6182, "step": 2931 }, { "epoch": 0.5661324580034756, "grad_norm": 0.7882037162780762, "learning_rate": 9.013558189654819e-05, "loss": 0.6791, "step": 2932 }, { "epoch": 0.5663255454720989, "grad_norm": 0.6653622984886169, "learning_rate": 9.012553057596402e-05, "loss": 0.6004, "step": 2933 }, { "epoch": 0.5665186329407221, "grad_norm": 0.556806743144989, "learning_rate": 9.01154746981353e-05, "loss": 0.6323, "step": 2934 }, { "epoch": 0.5667117204093455, "grad_norm": 0.7885903716087341, "learning_rate": 9.010541426420418e-05, "loss": 0.6073, "step": 2935 }, { "epoch": 0.5669048078779687, "grad_norm": 0.8524034023284912, "learning_rate": 9.009534927531322e-05, "loss": 0.645, "step": 2936 }, { "epoch": 0.567097895346592, "grad_norm": 0.8174388408660889, "learning_rate": 9.008527973260556e-05, "loss": 0.6638, "step": 2937 }, { "epoch": 0.5672909828152153, "grad_norm": 1.153361439704895, "learning_rate": 9.007520563722485e-05, "loss": 0.6328, "step": 2938 }, { "epoch": 0.5674840702838386, "grad_norm": 0.7168325185775757, "learning_rate": 9.006512699031524e-05, "loss": 0.6447, "step": 2939 }, { "epoch": 0.5676771577524619, "grad_norm": 1.6163047552108765, "learning_rate": 9.005504379302143e-05, "loss": 0.6392, "step": 2940 }, { "epoch": 0.5678702452210852, "grad_norm": 0.593449592590332, "learning_rate": 9.004495604648862e-05, "loss": 0.6885, "step": 2941 }, { "epoch": 0.5680633326897084, "grad_norm": 0.7049077153205872, "learning_rate": 9.003486375186249e-05, "loss": 0.6672, "step": 2942 }, { "epoch": 0.5682564201583318, "grad_norm": 1.899692177772522, "learning_rate": 9.002476691028929e-05, "loss": 0.6483, "step": 2943 }, { "epoch": 0.568449507626955, "grad_norm": 0.9620999097824097, "learning_rate": 9.001466552291577e-05, "loss": 0.668, "step": 2944 }, { "epoch": 0.5686425950955784, "grad_norm": 1.0031558275222778, "learning_rate": 9.000455959088918e-05, "loss": 0.7109, "step": 2945 }, { "epoch": 0.5688356825642016, "grad_norm": 0.7574772238731384, "learning_rate": 8.999444911535731e-05, "loss": 0.6375, "step": 2946 }, { "epoch": 0.5690287700328248, "grad_norm": 1.013588309288025, "learning_rate": 8.998433409746844e-05, "loss": 0.7128, "step": 2947 }, { "epoch": 0.5692218575014482, "grad_norm": 0.5567821264266968, "learning_rate": 8.997421453837138e-05, "loss": 0.6963, "step": 2948 }, { "epoch": 0.5694149449700714, "grad_norm": 0.5352619886398315, "learning_rate": 8.996409043921548e-05, "loss": 0.6841, "step": 2949 }, { "epoch": 0.5696080324386947, "grad_norm": 0.6018959879875183, "learning_rate": 8.995396180115056e-05, "loss": 0.6233, "step": 2950 }, { "epoch": 0.569801119907318, "grad_norm": 0.5205825567245483, "learning_rate": 8.994382862532698e-05, "loss": 0.6494, "step": 2951 }, { "epoch": 0.5699942073759413, "grad_norm": 0.7062390446662903, "learning_rate": 8.99336909128956e-05, "loss": 0.6821, "step": 2952 }, { "epoch": 0.5701872948445645, "grad_norm": 0.9607230424880981, "learning_rate": 8.992354866500784e-05, "loss": 0.6192, "step": 2953 }, { "epoch": 0.5703803823131879, "grad_norm": 0.714858889579773, "learning_rate": 8.991340188281558e-05, "loss": 0.717, "step": 2954 }, { "epoch": 0.5705734697818111, "grad_norm": 0.6080645322799683, "learning_rate": 8.990325056747124e-05, "loss": 0.706, "step": 2955 }, { "epoch": 0.5707665572504345, "grad_norm": 0.9491029977798462, "learning_rate": 8.989309472012775e-05, "loss": 0.6663, "step": 2956 }, { "epoch": 0.5709596447190577, "grad_norm": 0.9915834069252014, "learning_rate": 8.988293434193857e-05, "loss": 0.7364, "step": 2957 }, { "epoch": 0.571152732187681, "grad_norm": 0.8492850661277771, "learning_rate": 8.987276943405764e-05, "loss": 0.7087, "step": 2958 }, { "epoch": 0.5713458196563043, "grad_norm": 0.7384893298149109, "learning_rate": 8.986259999763945e-05, "loss": 0.6711, "step": 2959 }, { "epoch": 0.5715389071249276, "grad_norm": 0.5079546570777893, "learning_rate": 8.9852426033839e-05, "loss": 0.6815, "step": 2960 }, { "epoch": 0.5717319945935508, "grad_norm": 0.9950817227363586, "learning_rate": 8.984224754381178e-05, "loss": 0.6542, "step": 2961 }, { "epoch": 0.5719250820621742, "grad_norm": 0.6280733942985535, "learning_rate": 8.983206452871381e-05, "loss": 0.6563, "step": 2962 }, { "epoch": 0.5721181695307974, "grad_norm": 0.5906858444213867, "learning_rate": 8.982187698970164e-05, "loss": 0.628, "step": 2963 }, { "epoch": 0.5723112569994208, "grad_norm": 1.1889675855636597, "learning_rate": 8.981168492793228e-05, "loss": 0.6065, "step": 2964 }, { "epoch": 0.572504344468044, "grad_norm": 0.9934077858924866, "learning_rate": 8.980148834456332e-05, "loss": 0.6007, "step": 2965 }, { "epoch": 0.5726974319366673, "grad_norm": 0.6750931739807129, "learning_rate": 8.979128724075282e-05, "loss": 0.688, "step": 2966 }, { "epoch": 0.5728905194052906, "grad_norm": 0.5170718431472778, "learning_rate": 8.97810816176594e-05, "loss": 0.5962, "step": 2967 }, { "epoch": 0.5730836068739139, "grad_norm": 0.6662752628326416, "learning_rate": 8.977087147644212e-05, "loss": 0.7104, "step": 2968 }, { "epoch": 0.5732766943425371, "grad_norm": 0.7077036499977112, "learning_rate": 8.97606568182606e-05, "loss": 0.6861, "step": 2969 }, { "epoch": 0.5734697818111605, "grad_norm": 0.890177309513092, "learning_rate": 8.975043764427497e-05, "loss": 0.6024, "step": 2970 }, { "epoch": 0.5736628692797837, "grad_norm": 0.7859787940979004, "learning_rate": 8.974021395564589e-05, "loss": 0.6253, "step": 2971 }, { "epoch": 0.5738559567484071, "grad_norm": 0.5815303325653076, "learning_rate": 8.972998575353448e-05, "loss": 0.7126, "step": 2972 }, { "epoch": 0.5740490442170303, "grad_norm": 1.1528587341308594, "learning_rate": 8.971975303910243e-05, "loss": 0.6847, "step": 2973 }, { "epoch": 0.5742421316856536, "grad_norm": 0.6657235026359558, "learning_rate": 8.97095158135119e-05, "loss": 0.6115, "step": 2974 }, { "epoch": 0.5744352191542769, "grad_norm": 0.7458823323249817, "learning_rate": 8.96992740779256e-05, "loss": 0.6698, "step": 2975 }, { "epoch": 0.5746283066229002, "grad_norm": 0.855854868888855, "learning_rate": 8.968902783350673e-05, "loss": 0.6378, "step": 2976 }, { "epoch": 0.5748213940915234, "grad_norm": 0.9614738821983337, "learning_rate": 8.967877708141896e-05, "loss": 0.6635, "step": 2977 }, { "epoch": 0.5750144815601468, "grad_norm": 0.8127419948577881, "learning_rate": 8.966852182282656e-05, "loss": 0.5898, "step": 2978 }, { "epoch": 0.57520756902877, "grad_norm": 0.80559241771698, "learning_rate": 8.965826205889427e-05, "loss": 0.6747, "step": 2979 }, { "epoch": 0.5754006564973934, "grad_norm": 0.6562455296516418, "learning_rate": 8.964799779078732e-05, "loss": 0.6596, "step": 2980 }, { "epoch": 0.5755937439660166, "grad_norm": 1.374322533607483, "learning_rate": 8.963772901967148e-05, "loss": 0.6609, "step": 2981 }, { "epoch": 0.5757868314346399, "grad_norm": 0.7920988202095032, "learning_rate": 8.962745574671302e-05, "loss": 0.6729, "step": 2982 }, { "epoch": 0.5759799189032632, "grad_norm": 1.8508769273757935, "learning_rate": 8.961717797307872e-05, "loss": 0.7077, "step": 2983 }, { "epoch": 0.5761730063718865, "grad_norm": 0.6587318778038025, "learning_rate": 8.960689569993587e-05, "loss": 0.6886, "step": 2984 }, { "epoch": 0.5763660938405097, "grad_norm": 0.8036754727363586, "learning_rate": 8.959660892845231e-05, "loss": 0.6082, "step": 2985 }, { "epoch": 0.5765591813091331, "grad_norm": 0.6679559946060181, "learning_rate": 8.958631765979631e-05, "loss": 0.6595, "step": 2986 }, { "epoch": 0.5767522687777563, "grad_norm": 0.7481614947319031, "learning_rate": 8.957602189513673e-05, "loss": 0.7222, "step": 2987 }, { "epoch": 0.5769453562463797, "grad_norm": 0.804009735584259, "learning_rate": 8.95657216356429e-05, "loss": 0.7227, "step": 2988 }, { "epoch": 0.5771384437150029, "grad_norm": 0.8339055180549622, "learning_rate": 8.955541688248466e-05, "loss": 0.7163, "step": 2989 }, { "epoch": 0.5773315311836262, "grad_norm": 0.9424430131912231, "learning_rate": 8.954510763683237e-05, "loss": 0.736, "step": 2990 }, { "epoch": 0.5775246186522495, "grad_norm": 1.04116690158844, "learning_rate": 8.95347938998569e-05, "loss": 0.6799, "step": 2991 }, { "epoch": 0.5777177061208728, "grad_norm": 1.0654911994934082, "learning_rate": 8.952447567272966e-05, "loss": 0.6624, "step": 2992 }, { "epoch": 0.577910793589496, "grad_norm": 0.879804253578186, "learning_rate": 8.951415295662249e-05, "loss": 0.6288, "step": 2993 }, { "epoch": 0.5781038810581193, "grad_norm": 0.6083958148956299, "learning_rate": 8.950382575270783e-05, "loss": 0.5981, "step": 2994 }, { "epoch": 0.5782969685267426, "grad_norm": 0.6645472049713135, "learning_rate": 8.949349406215855e-05, "loss": 0.6405, "step": 2995 }, { "epoch": 0.5784900559953658, "grad_norm": 0.6575183272361755, "learning_rate": 8.948315788614811e-05, "loss": 0.6639, "step": 2996 }, { "epoch": 0.5786831434639892, "grad_norm": 1.1474556922912598, "learning_rate": 8.947281722585041e-05, "loss": 0.72, "step": 2997 }, { "epoch": 0.5788762309326124, "grad_norm": 0.6593042612075806, "learning_rate": 8.946247208243989e-05, "loss": 0.6654, "step": 2998 }, { "epoch": 0.5790693184012358, "grad_norm": 0.8856760859489441, "learning_rate": 8.945212245709152e-05, "loss": 0.6064, "step": 2999 }, { "epoch": 0.579262405869859, "grad_norm": 0.8174981474876404, "learning_rate": 8.944176835098073e-05, "loss": 0.6231, "step": 3000 }, { "epoch": 0.579262405869859, "eval_loss": 0.6985285878181458, "eval_runtime": 50.0478, "eval_samples_per_second": 13.267, "eval_steps_per_second": 0.42, "step": 3000 }, { "epoch": 0.5794554933384823, "grad_norm": 1.478705883026123, "learning_rate": 8.943140976528348e-05, "loss": 0.6098, "step": 3001 }, { "epoch": 0.5796485808071056, "grad_norm": 0.8759810328483582, "learning_rate": 8.942104670117626e-05, "loss": 0.6783, "step": 3002 }, { "epoch": 0.5798416682757289, "grad_norm": 1.039943814277649, "learning_rate": 8.941067915983604e-05, "loss": 0.7145, "step": 3003 }, { "epoch": 0.5800347557443521, "grad_norm": 0.536754310131073, "learning_rate": 8.940030714244034e-05, "loss": 0.6168, "step": 3004 }, { "epoch": 0.5802278432129755, "grad_norm": 0.8292717337608337, "learning_rate": 8.938993065016711e-05, "loss": 0.76, "step": 3005 }, { "epoch": 0.5804209306815987, "grad_norm": 1.493168830871582, "learning_rate": 8.937954968419489e-05, "loss": 0.68, "step": 3006 }, { "epoch": 0.5806140181502221, "grad_norm": 0.8430405259132385, "learning_rate": 8.93691642457027e-05, "loss": 0.6434, "step": 3007 }, { "epoch": 0.5808071056188453, "grad_norm": 0.6043791770935059, "learning_rate": 8.935877433587004e-05, "loss": 0.6083, "step": 3008 }, { "epoch": 0.5810001930874686, "grad_norm": 3.064066171646118, "learning_rate": 8.934837995587694e-05, "loss": 0.6272, "step": 3009 }, { "epoch": 0.5811932805560919, "grad_norm": 1.3802834749221802, "learning_rate": 8.933798110690396e-05, "loss": 0.6915, "step": 3010 }, { "epoch": 0.5813863680247152, "grad_norm": 4.884006500244141, "learning_rate": 8.932757779013214e-05, "loss": 0.6217, "step": 3011 }, { "epoch": 0.5815794554933384, "grad_norm": 0.7368252277374268, "learning_rate": 8.931717000674302e-05, "loss": 0.6321, "step": 3012 }, { "epoch": 0.5817725429619618, "grad_norm": 1.0795683860778809, "learning_rate": 8.930675775791866e-05, "loss": 0.7094, "step": 3013 }, { "epoch": 0.581965630430585, "grad_norm": 0.7929076552391052, "learning_rate": 8.929634104484165e-05, "loss": 0.6691, "step": 3014 }, { "epoch": 0.5821587178992084, "grad_norm": 0.8314948678016663, "learning_rate": 8.928591986869506e-05, "loss": 0.595, "step": 3015 }, { "epoch": 0.5823518053678316, "grad_norm": 0.7031694650650024, "learning_rate": 8.927549423066245e-05, "loss": 0.6384, "step": 3016 }, { "epoch": 0.5825448928364549, "grad_norm": 0.9254958033561707, "learning_rate": 8.926506413192792e-05, "loss": 0.6764, "step": 3017 }, { "epoch": 0.5827379803050782, "grad_norm": 0.5896854996681213, "learning_rate": 8.925462957367608e-05, "loss": 0.6616, "step": 3018 }, { "epoch": 0.5829310677737015, "grad_norm": 1.0432946681976318, "learning_rate": 8.924419055709201e-05, "loss": 0.6353, "step": 3019 }, { "epoch": 0.5831241552423247, "grad_norm": 1.7742209434509277, "learning_rate": 8.923374708336134e-05, "loss": 0.5885, "step": 3020 }, { "epoch": 0.5833172427109481, "grad_norm": 1.47212553024292, "learning_rate": 8.922329915367016e-05, "loss": 0.7164, "step": 3021 }, { "epoch": 0.5835103301795713, "grad_norm": 1.0402333736419678, "learning_rate": 8.921284676920511e-05, "loss": 0.6494, "step": 3022 }, { "epoch": 0.5837034176481947, "grad_norm": 1.2554019689559937, "learning_rate": 8.92023899311533e-05, "loss": 0.6408, "step": 3023 }, { "epoch": 0.5838965051168179, "grad_norm": 1.873224139213562, "learning_rate": 8.919192864070239e-05, "loss": 0.6659, "step": 3024 }, { "epoch": 0.5840895925854412, "grad_norm": 1.1576135158538818, "learning_rate": 8.918146289904048e-05, "loss": 0.6697, "step": 3025 }, { "epoch": 0.5842826800540645, "grad_norm": 0.9146751165390015, "learning_rate": 8.917099270735622e-05, "loss": 0.5938, "step": 3026 }, { "epoch": 0.5844757675226878, "grad_norm": 0.6172195076942444, "learning_rate": 8.91605180668388e-05, "loss": 0.6413, "step": 3027 }, { "epoch": 0.584668854991311, "grad_norm": 0.6212052702903748, "learning_rate": 8.915003897867781e-05, "loss": 0.6808, "step": 3028 }, { "epoch": 0.5848619424599344, "grad_norm": 0.914716362953186, "learning_rate": 8.913955544406347e-05, "loss": 0.5835, "step": 3029 }, { "epoch": 0.5850550299285576, "grad_norm": 0.8736541867256165, "learning_rate": 8.912906746418641e-05, "loss": 0.7409, "step": 3030 }, { "epoch": 0.585248117397181, "grad_norm": 0.6144726872444153, "learning_rate": 8.911857504023782e-05, "loss": 0.6124, "step": 3031 }, { "epoch": 0.5854412048658042, "grad_norm": 0.5736841559410095, "learning_rate": 8.910807817340934e-05, "loss": 0.6943, "step": 3032 }, { "epoch": 0.5856342923344275, "grad_norm": 1.0629078149795532, "learning_rate": 8.909757686489318e-05, "loss": 0.6306, "step": 3033 }, { "epoch": 0.5858273798030508, "grad_norm": 0.7951944470405579, "learning_rate": 8.9087071115882e-05, "loss": 0.724, "step": 3034 }, { "epoch": 0.5860204672716741, "grad_norm": 0.6844384074211121, "learning_rate": 8.9076560927569e-05, "loss": 0.736, "step": 3035 }, { "epoch": 0.5862135547402973, "grad_norm": 1.12155020236969, "learning_rate": 8.906604630114787e-05, "loss": 0.6729, "step": 3036 }, { "epoch": 0.5864066422089207, "grad_norm": 0.9328687191009521, "learning_rate": 8.905552723781282e-05, "loss": 0.6684, "step": 3037 }, { "epoch": 0.5865997296775439, "grad_norm": 2.28961443901062, "learning_rate": 8.904500373875853e-05, "loss": 0.6759, "step": 3038 }, { "epoch": 0.5867928171461673, "grad_norm": 1.7196345329284668, "learning_rate": 8.903447580518021e-05, "loss": 0.6214, "step": 3039 }, { "epoch": 0.5869859046147905, "grad_norm": 0.6773690581321716, "learning_rate": 8.902394343827355e-05, "loss": 0.6438, "step": 3040 }, { "epoch": 0.5871789920834137, "grad_norm": 0.903734564781189, "learning_rate": 8.901340663923479e-05, "loss": 0.6697, "step": 3041 }, { "epoch": 0.5873720795520371, "grad_norm": 0.9230191111564636, "learning_rate": 8.900286540926061e-05, "loss": 0.7153, "step": 3042 }, { "epoch": 0.5875651670206603, "grad_norm": 1.1938543319702148, "learning_rate": 8.899231974954825e-05, "loss": 0.6788, "step": 3043 }, { "epoch": 0.5877582544892836, "grad_norm": 1.0352011919021606, "learning_rate": 8.898176966129544e-05, "loss": 0.5804, "step": 3044 }, { "epoch": 0.5879513419579069, "grad_norm": 1.0203306674957275, "learning_rate": 8.897121514570037e-05, "loss": 0.6781, "step": 3045 }, { "epoch": 0.5881444294265302, "grad_norm": 1.2384525537490845, "learning_rate": 8.896065620396182e-05, "loss": 0.6746, "step": 3046 }, { "epoch": 0.5883375168951535, "grad_norm": 1.1388790607452393, "learning_rate": 8.895009283727893e-05, "loss": 0.6361, "step": 3047 }, { "epoch": 0.5885306043637768, "grad_norm": 1.4158061742782593, "learning_rate": 8.893952504685152e-05, "loss": 0.6762, "step": 3048 }, { "epoch": 0.5887236918324, "grad_norm": 0.9767947196960449, "learning_rate": 8.892895283387978e-05, "loss": 0.6533, "step": 3049 }, { "epoch": 0.5889167793010234, "grad_norm": 0.6043333411216736, "learning_rate": 8.891837619956445e-05, "loss": 0.7037, "step": 3050 }, { "epoch": 0.5891098667696466, "grad_norm": 0.9231788516044617, "learning_rate": 8.890779514510676e-05, "loss": 0.6565, "step": 3051 }, { "epoch": 0.58930295423827, "grad_norm": 0.7886799573898315, "learning_rate": 8.889720967170848e-05, "loss": 0.6655, "step": 3052 }, { "epoch": 0.5894960417068932, "grad_norm": 1.250755786895752, "learning_rate": 8.888661978057182e-05, "loss": 0.6658, "step": 3053 }, { "epoch": 0.5896891291755165, "grad_norm": 0.9533796906471252, "learning_rate": 8.887602547289954e-05, "loss": 0.6571, "step": 3054 }, { "epoch": 0.5898822166441398, "grad_norm": 0.6219698786735535, "learning_rate": 8.886542674989489e-05, "loss": 0.6332, "step": 3055 }, { "epoch": 0.5900753041127631, "grad_norm": 0.6825438737869263, "learning_rate": 8.885482361276158e-05, "loss": 0.6533, "step": 3056 }, { "epoch": 0.5902683915813863, "grad_norm": 3.6290340423583984, "learning_rate": 8.884421606270391e-05, "loss": 0.6556, "step": 3057 }, { "epoch": 0.5904614790500097, "grad_norm": 1.8193403482437134, "learning_rate": 8.88336041009266e-05, "loss": 0.6768, "step": 3058 }, { "epoch": 0.5906545665186329, "grad_norm": 0.9467763900756836, "learning_rate": 8.882298772863491e-05, "loss": 0.6741, "step": 3059 }, { "epoch": 0.5908476539872562, "grad_norm": 0.5055274367332458, "learning_rate": 8.881236694703457e-05, "loss": 0.6201, "step": 3060 }, { "epoch": 0.5910407414558795, "grad_norm": 0.7306054830551147, "learning_rate": 8.880174175733187e-05, "loss": 0.6381, "step": 3061 }, { "epoch": 0.5912338289245028, "grad_norm": 0.9443287253379822, "learning_rate": 8.879111216073352e-05, "loss": 0.678, "step": 3062 }, { "epoch": 0.5914269163931261, "grad_norm": 0.9171637296676636, "learning_rate": 8.878047815844681e-05, "loss": 0.6609, "step": 3063 }, { "epoch": 0.5916200038617494, "grad_norm": 0.7843788266181946, "learning_rate": 8.876983975167948e-05, "loss": 0.7311, "step": 3064 }, { "epoch": 0.5918130913303726, "grad_norm": 0.7517201900482178, "learning_rate": 8.875919694163976e-05, "loss": 0.6375, "step": 3065 }, { "epoch": 0.592006178798996, "grad_norm": 0.8762763738632202, "learning_rate": 8.874854972953644e-05, "loss": 0.6792, "step": 3066 }, { "epoch": 0.5921992662676192, "grad_norm": 0.6982625722885132, "learning_rate": 8.873789811657876e-05, "loss": 0.6831, "step": 3067 }, { "epoch": 0.5923923537362425, "grad_norm": 0.5194947719573975, "learning_rate": 8.872724210397645e-05, "loss": 0.6763, "step": 3068 }, { "epoch": 0.5925854412048658, "grad_norm": 0.9613003134727478, "learning_rate": 8.871658169293981e-05, "loss": 0.6738, "step": 3069 }, { "epoch": 0.5927785286734891, "grad_norm": 0.6347588896751404, "learning_rate": 8.870591688467957e-05, "loss": 0.6601, "step": 3070 }, { "epoch": 0.5929716161421124, "grad_norm": 0.5811511874198914, "learning_rate": 8.869524768040698e-05, "loss": 0.656, "step": 3071 }, { "epoch": 0.5931647036107357, "grad_norm": 1.402997374534607, "learning_rate": 8.868457408133377e-05, "loss": 0.6164, "step": 3072 }, { "epoch": 0.5933577910793589, "grad_norm": 5.094156742095947, "learning_rate": 8.867389608867224e-05, "loss": 0.6476, "step": 3073 }, { "epoch": 0.5935508785479823, "grad_norm": 0.8433781266212463, "learning_rate": 8.86632137036351e-05, "loss": 0.6479, "step": 3074 }, { "epoch": 0.5937439660166055, "grad_norm": 0.7329630851745605, "learning_rate": 8.865252692743561e-05, "loss": 0.6812, "step": 3075 }, { "epoch": 0.5939370534852288, "grad_norm": 0.5961114764213562, "learning_rate": 8.864183576128752e-05, "loss": 0.6338, "step": 3076 }, { "epoch": 0.5941301409538521, "grad_norm": 0.6696957349777222, "learning_rate": 8.863114020640508e-05, "loss": 0.6358, "step": 3077 }, { "epoch": 0.5943232284224754, "grad_norm": 2.7264764308929443, "learning_rate": 8.862044026400302e-05, "loss": 0.6928, "step": 3078 }, { "epoch": 0.5945163158910987, "grad_norm": 1.475777268409729, "learning_rate": 8.860973593529658e-05, "loss": 0.6174, "step": 3079 }, { "epoch": 0.594709403359722, "grad_norm": 0.8051822185516357, "learning_rate": 8.859902722150153e-05, "loss": 0.672, "step": 3080 }, { "epoch": 0.5949024908283452, "grad_norm": 0.6147481799125671, "learning_rate": 8.858831412383408e-05, "loss": 0.7036, "step": 3081 }, { "epoch": 0.5950955782969686, "grad_norm": 2.3676254749298096, "learning_rate": 8.8577596643511e-05, "loss": 0.637, "step": 3082 }, { "epoch": 0.5952886657655918, "grad_norm": 0.5731756091117859, "learning_rate": 8.856687478174947e-05, "loss": 0.703, "step": 3083 }, { "epoch": 0.5954817532342151, "grad_norm": 0.8164805173873901, "learning_rate": 8.855614853976728e-05, "loss": 0.6723, "step": 3084 }, { "epoch": 0.5956748407028384, "grad_norm": 1.1591761112213135, "learning_rate": 8.854541791878262e-05, "loss": 0.6031, "step": 3085 }, { "epoch": 0.5958679281714617, "grad_norm": 0.6015987992286682, "learning_rate": 8.853468292001423e-05, "loss": 0.6549, "step": 3086 }, { "epoch": 0.596061015640085, "grad_norm": 1.4611483812332153, "learning_rate": 8.852394354468134e-05, "loss": 0.722, "step": 3087 }, { "epoch": 0.5962541031087083, "grad_norm": 0.9333136677742004, "learning_rate": 8.851319979400366e-05, "loss": 0.7132, "step": 3088 }, { "epoch": 0.5964471905773315, "grad_norm": 0.7077903747558594, "learning_rate": 8.850245166920142e-05, "loss": 0.6108, "step": 3089 }, { "epoch": 0.5966402780459548, "grad_norm": 1.6384425163269043, "learning_rate": 8.849169917149531e-05, "loss": 0.6868, "step": 3090 }, { "epoch": 0.5968333655145781, "grad_norm": 0.8226354718208313, "learning_rate": 8.848094230210659e-05, "loss": 0.6907, "step": 3091 }, { "epoch": 0.5970264529832013, "grad_norm": 1.3045059442520142, "learning_rate": 8.84701810622569e-05, "loss": 0.7078, "step": 3092 }, { "epoch": 0.5972195404518247, "grad_norm": 12.501705169677734, "learning_rate": 8.845941545316852e-05, "loss": 0.6827, "step": 3093 }, { "epoch": 0.5974126279204479, "grad_norm": 0.6993937492370605, "learning_rate": 8.844864547606411e-05, "loss": 0.6416, "step": 3094 }, { "epoch": 0.5976057153890713, "grad_norm": 1.7370203733444214, "learning_rate": 8.843787113216684e-05, "loss": 0.6536, "step": 3095 }, { "epoch": 0.5977988028576945, "grad_norm": 0.7986916899681091, "learning_rate": 8.842709242270046e-05, "loss": 0.6677, "step": 3096 }, { "epoch": 0.5979918903263178, "grad_norm": 0.718407392501831, "learning_rate": 8.841630934888912e-05, "loss": 0.6989, "step": 3097 }, { "epoch": 0.5981849777949411, "grad_norm": 1.0064905881881714, "learning_rate": 8.840552191195752e-05, "loss": 0.7016, "step": 3098 }, { "epoch": 0.5983780652635644, "grad_norm": 1.0153495073318481, "learning_rate": 8.839473011313084e-05, "loss": 0.6486, "step": 3099 }, { "epoch": 0.5985711527321876, "grad_norm": 1.013521671295166, "learning_rate": 8.838393395363474e-05, "loss": 0.6515, "step": 3100 }, { "epoch": 0.598764240200811, "grad_norm": 0.8587341904640198, "learning_rate": 8.83731334346954e-05, "loss": 0.6581, "step": 3101 }, { "epoch": 0.5989573276694342, "grad_norm": 1.696001410484314, "learning_rate": 8.83623285575395e-05, "loss": 0.7612, "step": 3102 }, { "epoch": 0.5991504151380576, "grad_norm": 0.48171329498291016, "learning_rate": 8.835151932339417e-05, "loss": 0.6679, "step": 3103 }, { "epoch": 0.5993435026066808, "grad_norm": 0.9711824059486389, "learning_rate": 8.83407057334871e-05, "loss": 0.6716, "step": 3104 }, { "epoch": 0.5995365900753041, "grad_norm": 0.748436689376831, "learning_rate": 8.832988778904641e-05, "loss": 0.7525, "step": 3105 }, { "epoch": 0.5997296775439274, "grad_norm": 1.0561577081680298, "learning_rate": 8.831906549130076e-05, "loss": 0.7051, "step": 3106 }, { "epoch": 0.5999227650125507, "grad_norm": 0.6740824580192566, "learning_rate": 8.830823884147929e-05, "loss": 0.619, "step": 3107 }, { "epoch": 0.6001158524811739, "grad_norm": 0.614112377166748, "learning_rate": 8.829740784081162e-05, "loss": 0.7138, "step": 3108 }, { "epoch": 0.6003089399497973, "grad_norm": 0.6510611176490784, "learning_rate": 8.82865724905279e-05, "loss": 0.6321, "step": 3109 }, { "epoch": 0.6005020274184205, "grad_norm": 1.1230034828186035, "learning_rate": 8.827573279185872e-05, "loss": 0.6479, "step": 3110 }, { "epoch": 0.6006951148870439, "grad_norm": 0.7880390286445618, "learning_rate": 8.826488874603523e-05, "loss": 0.6466, "step": 3111 }, { "epoch": 0.6008882023556671, "grad_norm": 1.0641086101531982, "learning_rate": 8.825404035428901e-05, "loss": 0.6338, "step": 3112 }, { "epoch": 0.6010812898242904, "grad_norm": 0.7803242206573486, "learning_rate": 8.824318761785219e-05, "loss": 0.618, "step": 3113 }, { "epoch": 0.6012743772929137, "grad_norm": 0.6471132636070251, "learning_rate": 8.823233053795735e-05, "loss": 0.6335, "step": 3114 }, { "epoch": 0.601467464761537, "grad_norm": 0.6012256741523743, "learning_rate": 8.822146911583757e-05, "loss": 0.6112, "step": 3115 }, { "epoch": 0.6016605522301602, "grad_norm": 2.6783010959625244, "learning_rate": 8.821060335272645e-05, "loss": 0.6469, "step": 3116 }, { "epoch": 0.6018536396987836, "grad_norm": 0.7624860405921936, "learning_rate": 8.819973324985806e-05, "loss": 0.618, "step": 3117 }, { "epoch": 0.6020467271674068, "grad_norm": 1.2006127834320068, "learning_rate": 8.818885880846697e-05, "loss": 0.686, "step": 3118 }, { "epoch": 0.6022398146360302, "grad_norm": 1.0212172269821167, "learning_rate": 8.817798002978823e-05, "loss": 0.6772, "step": 3119 }, { "epoch": 0.6024329021046534, "grad_norm": 0.734103262424469, "learning_rate": 8.816709691505743e-05, "loss": 0.6329, "step": 3120 }, { "epoch": 0.6026259895732767, "grad_norm": 0.9651567935943604, "learning_rate": 8.815620946551056e-05, "loss": 0.6786, "step": 3121 }, { "epoch": 0.6028190770419, "grad_norm": 0.8605098128318787, "learning_rate": 8.814531768238419e-05, "loss": 0.6177, "step": 3122 }, { "epoch": 0.6030121645105233, "grad_norm": 0.6123762130737305, "learning_rate": 8.813442156691536e-05, "loss": 0.6717, "step": 3123 }, { "epoch": 0.6032052519791465, "grad_norm": 0.6388707160949707, "learning_rate": 8.812352112034159e-05, "loss": 0.6724, "step": 3124 }, { "epoch": 0.6033983394477699, "grad_norm": 0.5547511577606201, "learning_rate": 8.811261634390087e-05, "loss": 0.5948, "step": 3125 }, { "epoch": 0.6035914269163931, "grad_norm": 1.2888849973678589, "learning_rate": 8.810170723883172e-05, "loss": 0.6854, "step": 3126 }, { "epoch": 0.6037845143850165, "grad_norm": 0.5548261404037476, "learning_rate": 8.809079380637315e-05, "loss": 0.7185, "step": 3127 }, { "epoch": 0.6039776018536397, "grad_norm": 0.8055516481399536, "learning_rate": 8.807987604776464e-05, "loss": 0.6652, "step": 3128 }, { "epoch": 0.604170689322263, "grad_norm": 0.7615648508071899, "learning_rate": 8.806895396424616e-05, "loss": 0.6463, "step": 3129 }, { "epoch": 0.6043637767908863, "grad_norm": 1.5669174194335938, "learning_rate": 8.805802755705821e-05, "loss": 0.6165, "step": 3130 }, { "epoch": 0.6045568642595096, "grad_norm": 1.1483861207962036, "learning_rate": 8.804709682744174e-05, "loss": 0.6889, "step": 3131 }, { "epoch": 0.6047499517281328, "grad_norm": 0.9872117638587952, "learning_rate": 8.803616177663821e-05, "loss": 0.6514, "step": 3132 }, { "epoch": 0.6049430391967562, "grad_norm": 0.6302210688591003, "learning_rate": 8.802522240588953e-05, "loss": 0.609, "step": 3133 }, { "epoch": 0.6051361266653794, "grad_norm": 1.3989861011505127, "learning_rate": 8.801427871643818e-05, "loss": 0.6433, "step": 3134 }, { "epoch": 0.6053292141340028, "grad_norm": 1.677590250968933, "learning_rate": 8.800333070952707e-05, "loss": 0.7061, "step": 3135 }, { "epoch": 0.605522301602626, "grad_norm": 0.5891548991203308, "learning_rate": 8.79923783863996e-05, "loss": 0.6182, "step": 3136 }, { "epoch": 0.6057153890712492, "grad_norm": 0.9890543222427368, "learning_rate": 8.79814217482997e-05, "loss": 0.663, "step": 3137 }, { "epoch": 0.6059084765398726, "grad_norm": 0.9663726091384888, "learning_rate": 8.797046079647175e-05, "loss": 0.5995, "step": 3138 }, { "epoch": 0.6061015640084958, "grad_norm": 1.1885473728179932, "learning_rate": 8.795949553216065e-05, "loss": 0.6066, "step": 3139 }, { "epoch": 0.6062946514771191, "grad_norm": 0.6905332803726196, "learning_rate": 8.794852595661178e-05, "loss": 0.6581, "step": 3140 }, { "epoch": 0.6064877389457424, "grad_norm": 0.616190493106842, "learning_rate": 8.7937552071071e-05, "loss": 0.6776, "step": 3141 }, { "epoch": 0.6066808264143657, "grad_norm": 0.6224045157432556, "learning_rate": 8.792657387678465e-05, "loss": 0.6608, "step": 3142 }, { "epoch": 0.606873913882989, "grad_norm": 0.9177461266517639, "learning_rate": 8.791559137499959e-05, "loss": 0.6805, "step": 3143 }, { "epoch": 0.6070670013516123, "grad_norm": 0.71163010597229, "learning_rate": 8.790460456696316e-05, "loss": 0.6258, "step": 3144 }, { "epoch": 0.6072600888202355, "grad_norm": 0.6522877216339111, "learning_rate": 8.789361345392316e-05, "loss": 0.6607, "step": 3145 }, { "epoch": 0.6074531762888589, "grad_norm": 0.676386833190918, "learning_rate": 8.788261803712793e-05, "loss": 0.6643, "step": 3146 }, { "epoch": 0.6076462637574821, "grad_norm": 0.9132712483406067, "learning_rate": 8.787161831782626e-05, "loss": 0.6272, "step": 3147 }, { "epoch": 0.6078393512261054, "grad_norm": 1.8406171798706055, "learning_rate": 8.786061429726743e-05, "loss": 0.6536, "step": 3148 }, { "epoch": 0.6080324386947287, "grad_norm": 0.9073221683502197, "learning_rate": 8.784960597670124e-05, "loss": 0.6436, "step": 3149 }, { "epoch": 0.608225526163352, "grad_norm": 0.991352379322052, "learning_rate": 8.783859335737792e-05, "loss": 0.7145, "step": 3150 }, { "epoch": 0.6084186136319752, "grad_norm": 8.27409839630127, "learning_rate": 8.782757644054826e-05, "loss": 0.6479, "step": 3151 }, { "epoch": 0.6086117011005986, "grad_norm": 0.7699120044708252, "learning_rate": 8.781655522746351e-05, "loss": 0.6581, "step": 3152 }, { "epoch": 0.6088047885692218, "grad_norm": 0.8316440582275391, "learning_rate": 8.780552971937534e-05, "loss": 0.5818, "step": 3153 }, { "epoch": 0.6089978760378452, "grad_norm": 0.8204681873321533, "learning_rate": 8.779449991753604e-05, "loss": 0.6421, "step": 3154 }, { "epoch": 0.6091909635064684, "grad_norm": 1.0699617862701416, "learning_rate": 8.778346582319828e-05, "loss": 0.7334, "step": 3155 }, { "epoch": 0.6093840509750917, "grad_norm": 0.7534817457199097, "learning_rate": 8.777242743761526e-05, "loss": 0.7916, "step": 3156 }, { "epoch": 0.609577138443715, "grad_norm": 1.1365480422973633, "learning_rate": 8.776138476204066e-05, "loss": 0.6714, "step": 3157 }, { "epoch": 0.6097702259123383, "grad_norm": 0.8633044958114624, "learning_rate": 8.775033779772865e-05, "loss": 0.6235, "step": 3158 }, { "epoch": 0.6099633133809615, "grad_norm": 2.5902562141418457, "learning_rate": 8.773928654593388e-05, "loss": 0.6958, "step": 3159 }, { "epoch": 0.6101564008495849, "grad_norm": 3.8640880584716797, "learning_rate": 8.772823100791151e-05, "loss": 0.6887, "step": 3160 }, { "epoch": 0.6103494883182081, "grad_norm": 0.8963883519172668, "learning_rate": 8.771717118491715e-05, "loss": 0.6782, "step": 3161 }, { "epoch": 0.6105425757868315, "grad_norm": 1.1291950941085815, "learning_rate": 8.770610707820693e-05, "loss": 0.6328, "step": 3162 }, { "epoch": 0.6107356632554547, "grad_norm": 1.2753902673721313, "learning_rate": 8.769503868903744e-05, "loss": 0.6112, "step": 3163 }, { "epoch": 0.610928750724078, "grad_norm": 1.8194199800491333, "learning_rate": 8.768396601866577e-05, "loss": 0.628, "step": 3164 }, { "epoch": 0.6111218381927013, "grad_norm": 1.4367389678955078, "learning_rate": 8.76728890683495e-05, "loss": 0.6428, "step": 3165 }, { "epoch": 0.6113149256613246, "grad_norm": 0.8622623085975647, "learning_rate": 8.76618078393467e-05, "loss": 0.5996, "step": 3166 }, { "epoch": 0.6115080131299478, "grad_norm": 0.826208770275116, "learning_rate": 8.76507223329159e-05, "loss": 0.6326, "step": 3167 }, { "epoch": 0.6117011005985712, "grad_norm": 3.0922727584838867, "learning_rate": 8.763963255031613e-05, "loss": 0.6976, "step": 3168 }, { "epoch": 0.6118941880671944, "grad_norm": 1.2050702571868896, "learning_rate": 8.762853849280693e-05, "loss": 0.7383, "step": 3169 }, { "epoch": 0.6120872755358178, "grad_norm": 0.6499557495117188, "learning_rate": 8.76174401616483e-05, "loss": 0.6136, "step": 3170 }, { "epoch": 0.612280363004441, "grad_norm": 1.0853625535964966, "learning_rate": 8.760633755810071e-05, "loss": 0.6182, "step": 3171 }, { "epoch": 0.6124734504730643, "grad_norm": 0.9963617324829102, "learning_rate": 8.759523068342514e-05, "loss": 0.6521, "step": 3172 }, { "epoch": 0.6126665379416876, "grad_norm": 0.5772719979286194, "learning_rate": 8.758411953888309e-05, "loss": 0.6287, "step": 3173 }, { "epoch": 0.6128596254103109, "grad_norm": 2.5710794925689697, "learning_rate": 8.757300412573644e-05, "loss": 0.6925, "step": 3174 }, { "epoch": 0.6130527128789341, "grad_norm": 1.1073518991470337, "learning_rate": 8.756188444524767e-05, "loss": 0.6164, "step": 3175 }, { "epoch": 0.6132458003475575, "grad_norm": 1.7362314462661743, "learning_rate": 8.755076049867966e-05, "loss": 0.711, "step": 3176 }, { "epoch": 0.6134388878161807, "grad_norm": 0.6405041217803955, "learning_rate": 8.753963228729582e-05, "loss": 0.6413, "step": 3177 }, { "epoch": 0.6136319752848041, "grad_norm": 0.7103281617164612, "learning_rate": 8.752849981236006e-05, "loss": 0.641, "step": 3178 }, { "epoch": 0.6138250627534273, "grad_norm": 1.138574242591858, "learning_rate": 8.751736307513671e-05, "loss": 0.6359, "step": 3179 }, { "epoch": 0.6140181502220506, "grad_norm": 0.890854001045227, "learning_rate": 8.750622207689065e-05, "loss": 0.6236, "step": 3180 }, { "epoch": 0.6142112376906739, "grad_norm": 0.6904204487800598, "learning_rate": 8.749507681888718e-05, "loss": 0.6599, "step": 3181 }, { "epoch": 0.6144043251592972, "grad_norm": 0.7476391792297363, "learning_rate": 8.748392730239216e-05, "loss": 0.652, "step": 3182 }, { "epoch": 0.6145974126279204, "grad_norm": 1.2412817478179932, "learning_rate": 8.747277352867185e-05, "loss": 0.674, "step": 3183 }, { "epoch": 0.6147905000965437, "grad_norm": 0.7875796556472778, "learning_rate": 8.746161549899309e-05, "loss": 0.7295, "step": 3184 }, { "epoch": 0.614983587565167, "grad_norm": 3.8006372451782227, "learning_rate": 8.745045321462311e-05, "loss": 0.6717, "step": 3185 }, { "epoch": 0.6151766750337903, "grad_norm": 0.5309248566627502, "learning_rate": 8.743928667682966e-05, "loss": 0.5615, "step": 3186 }, { "epoch": 0.6153697625024136, "grad_norm": 0.8652348518371582, "learning_rate": 8.7428115886881e-05, "loss": 0.5778, "step": 3187 }, { "epoch": 0.6155628499710368, "grad_norm": 0.947371244430542, "learning_rate": 8.741694084604584e-05, "loss": 0.6268, "step": 3188 }, { "epoch": 0.6157559374396602, "grad_norm": 0.681553065776825, "learning_rate": 8.740576155559338e-05, "loss": 0.5779, "step": 3189 }, { "epoch": 0.6159490249082834, "grad_norm": 0.5921410322189331, "learning_rate": 8.739457801679328e-05, "loss": 0.6014, "step": 3190 }, { "epoch": 0.6161421123769067, "grad_norm": 4.605844974517822, "learning_rate": 8.738339023091575e-05, "loss": 0.6672, "step": 3191 }, { "epoch": 0.61633519984553, "grad_norm": 0.6563619375228882, "learning_rate": 8.737219819923142e-05, "loss": 0.6064, "step": 3192 }, { "epoch": 0.6165282873141533, "grad_norm": 0.8768596053123474, "learning_rate": 8.736100192301143e-05, "loss": 0.6609, "step": 3193 }, { "epoch": 0.6167213747827766, "grad_norm": 0.9783595204353333, "learning_rate": 8.734980140352735e-05, "loss": 0.5669, "step": 3194 }, { "epoch": 0.6169144622513999, "grad_norm": 3.053551435470581, "learning_rate": 8.733859664205134e-05, "loss": 0.6371, "step": 3195 }, { "epoch": 0.6171075497200231, "grad_norm": 0.7288690805435181, "learning_rate": 8.732738763985594e-05, "loss": 0.6646, "step": 3196 }, { "epoch": 0.6173006371886465, "grad_norm": 1.1514643430709839, "learning_rate": 8.731617439821423e-05, "loss": 0.6415, "step": 3197 }, { "epoch": 0.6174937246572697, "grad_norm": 2.6381988525390625, "learning_rate": 8.73049569183997e-05, "loss": 0.6111, "step": 3198 }, { "epoch": 0.617686812125893, "grad_norm": 1.3341729640960693, "learning_rate": 8.729373520168644e-05, "loss": 0.6587, "step": 3199 }, { "epoch": 0.6178798995945163, "grad_norm": 0.825278103351593, "learning_rate": 8.728250924934893e-05, "loss": 0.6604, "step": 3200 }, { "epoch": 0.6180729870631396, "grad_norm": 1.036177635192871, "learning_rate": 8.727127906266212e-05, "loss": 0.6578, "step": 3201 }, { "epoch": 0.6182660745317629, "grad_norm": 0.8016918897628784, "learning_rate": 8.726004464290152e-05, "loss": 0.6462, "step": 3202 }, { "epoch": 0.6184591620003862, "grad_norm": 0.9443320631980896, "learning_rate": 8.724880599134305e-05, "loss": 0.636, "step": 3203 }, { "epoch": 0.6186522494690094, "grad_norm": 0.944796621799469, "learning_rate": 8.723756310926314e-05, "loss": 0.6421, "step": 3204 }, { "epoch": 0.6188453369376328, "grad_norm": 1.6554416418075562, "learning_rate": 8.72263159979387e-05, "loss": 0.6893, "step": 3205 }, { "epoch": 0.619038424406256, "grad_norm": 1.0901875495910645, "learning_rate": 8.72150646586471e-05, "loss": 0.712, "step": 3206 }, { "epoch": 0.6192315118748793, "grad_norm": 1.9079216718673706, "learning_rate": 8.720380909266625e-05, "loss": 0.6598, "step": 3207 }, { "epoch": 0.6194245993435026, "grad_norm": 0.8486428260803223, "learning_rate": 8.719254930127446e-05, "loss": 0.6328, "step": 3208 }, { "epoch": 0.6196176868121259, "grad_norm": 0.6840015649795532, "learning_rate": 8.718128528575057e-05, "loss": 0.5826, "step": 3209 }, { "epoch": 0.6198107742807492, "grad_norm": 0.5644471049308777, "learning_rate": 8.717001704737388e-05, "loss": 0.5648, "step": 3210 }, { "epoch": 0.6200038617493725, "grad_norm": 0.6957864165306091, "learning_rate": 8.71587445874242e-05, "loss": 0.7203, "step": 3211 }, { "epoch": 0.6201969492179957, "grad_norm": 0.930766761302948, "learning_rate": 8.714746790718176e-05, "loss": 0.6873, "step": 3212 }, { "epoch": 0.6203900366866191, "grad_norm": 0.9926846027374268, "learning_rate": 8.713618700792732e-05, "loss": 0.6363, "step": 3213 }, { "epoch": 0.6205831241552423, "grad_norm": 0.8385704159736633, "learning_rate": 8.712490189094211e-05, "loss": 0.6714, "step": 3214 }, { "epoch": 0.6207762116238656, "grad_norm": 0.8744637966156006, "learning_rate": 8.711361255750785e-05, "loss": 0.6054, "step": 3215 }, { "epoch": 0.6209692990924889, "grad_norm": 0.8918502330780029, "learning_rate": 8.710231900890669e-05, "loss": 0.6614, "step": 3216 }, { "epoch": 0.6211623865611122, "grad_norm": 1.6765198707580566, "learning_rate": 8.709102124642131e-05, "loss": 0.7208, "step": 3217 }, { "epoch": 0.6213554740297355, "grad_norm": 1.0012673139572144, "learning_rate": 8.707971927133485e-05, "loss": 0.7549, "step": 3218 }, { "epoch": 0.6215485614983588, "grad_norm": 1.1738120317459106, "learning_rate": 8.706841308493093e-05, "loss": 0.6989, "step": 3219 }, { "epoch": 0.621741648966982, "grad_norm": 1.2301826477050781, "learning_rate": 8.70571026884936e-05, "loss": 0.56, "step": 3220 }, { "epoch": 0.6219347364356054, "grad_norm": 1.326960563659668, "learning_rate": 8.704578808330752e-05, "loss": 0.6571, "step": 3221 }, { "epoch": 0.6221278239042286, "grad_norm": 1.169061541557312, "learning_rate": 8.703446927065769e-05, "loss": 0.6791, "step": 3222 }, { "epoch": 0.622320911372852, "grad_norm": 0.9178202152252197, "learning_rate": 8.702314625182964e-05, "loss": 0.6724, "step": 3223 }, { "epoch": 0.6225139988414752, "grad_norm": 2.1771788597106934, "learning_rate": 8.701181902810938e-05, "loss": 0.6623, "step": 3224 }, { "epoch": 0.6227070863100985, "grad_norm": 1.8459981679916382, "learning_rate": 8.70004876007834e-05, "loss": 0.6687, "step": 3225 }, { "epoch": 0.6229001737787218, "grad_norm": 3.6869895458221436, "learning_rate": 8.698915197113867e-05, "loss": 0.6206, "step": 3226 }, { "epoch": 0.6230932612473451, "grad_norm": 1.5466982126235962, "learning_rate": 8.697781214046263e-05, "loss": 0.6789, "step": 3227 }, { "epoch": 0.6232863487159683, "grad_norm": 1.1023526191711426, "learning_rate": 8.69664681100432e-05, "loss": 0.6318, "step": 3228 }, { "epoch": 0.6234794361845917, "grad_norm": 1.0695194005966187, "learning_rate": 8.695511988116875e-05, "loss": 0.634, "step": 3229 }, { "epoch": 0.6236725236532149, "grad_norm": 0.9167725443840027, "learning_rate": 8.69437674551282e-05, "loss": 0.6149, "step": 3230 }, { "epoch": 0.6238656111218381, "grad_norm": 1.035781741142273, "learning_rate": 8.693241083321085e-05, "loss": 0.6836, "step": 3231 }, { "epoch": 0.6240586985904615, "grad_norm": 0.9745731353759766, "learning_rate": 8.692105001670655e-05, "loss": 0.6485, "step": 3232 }, { "epoch": 0.6242517860590847, "grad_norm": 1.013683557510376, "learning_rate": 8.690968500690557e-05, "loss": 0.656, "step": 3233 }, { "epoch": 0.624444873527708, "grad_norm": 1.3041861057281494, "learning_rate": 8.689831580509874e-05, "loss": 0.5951, "step": 3234 }, { "epoch": 0.6246379609963313, "grad_norm": 0.865645706653595, "learning_rate": 8.688694241257728e-05, "loss": 0.6298, "step": 3235 }, { "epoch": 0.6248310484649546, "grad_norm": 1.8686619997024536, "learning_rate": 8.687556483063293e-05, "loss": 0.6807, "step": 3236 }, { "epoch": 0.6250241359335779, "grad_norm": 0.9728381633758545, "learning_rate": 8.686418306055788e-05, "loss": 0.6297, "step": 3237 }, { "epoch": 0.6252172234022012, "grad_norm": 0.8367204070091248, "learning_rate": 8.685279710364481e-05, "loss": 0.6707, "step": 3238 }, { "epoch": 0.6254103108708244, "grad_norm": 0.8484258055686951, "learning_rate": 8.68414069611869e-05, "loss": 0.6669, "step": 3239 }, { "epoch": 0.6256033983394478, "grad_norm": 0.9057306051254272, "learning_rate": 8.683001263447778e-05, "loss": 0.6881, "step": 3240 }, { "epoch": 0.625796485808071, "grad_norm": 1.1278479099273682, "learning_rate": 8.681861412481153e-05, "loss": 0.7, "step": 3241 }, { "epoch": 0.6259895732766944, "grad_norm": 1.802075982093811, "learning_rate": 8.680721143348277e-05, "loss": 0.6158, "step": 3242 }, { "epoch": 0.6261826607453176, "grad_norm": 1.102781057357788, "learning_rate": 8.679580456178651e-05, "loss": 0.6723, "step": 3243 }, { "epoch": 0.6263757482139409, "grad_norm": 1.0443049669265747, "learning_rate": 8.678439351101831e-05, "loss": 0.6653, "step": 3244 }, { "epoch": 0.6265688356825642, "grad_norm": 1.0171781778335571, "learning_rate": 8.677297828247418e-05, "loss": 0.6205, "step": 3245 }, { "epoch": 0.6267619231511875, "grad_norm": 0.9503247141838074, "learning_rate": 8.676155887745062e-05, "loss": 0.6985, "step": 3246 }, { "epoch": 0.6269550106198107, "grad_norm": 2.491938829421997, "learning_rate": 8.675013529724451e-05, "loss": 0.6333, "step": 3247 }, { "epoch": 0.6271480980884341, "grad_norm": 0.9557819366455078, "learning_rate": 8.673870754315336e-05, "loss": 0.6637, "step": 3248 }, { "epoch": 0.6273411855570573, "grad_norm": 2.0906338691711426, "learning_rate": 8.672727561647505e-05, "loss": 0.6202, "step": 3249 }, { "epoch": 0.6275342730256807, "grad_norm": 1.1093158721923828, "learning_rate": 8.671583951850795e-05, "loss": 0.6449, "step": 3250 }, { "epoch": 0.6277273604943039, "grad_norm": 1.3571226596832275, "learning_rate": 8.670439925055089e-05, "loss": 0.6373, "step": 3251 }, { "epoch": 0.6279204479629272, "grad_norm": 1.4403594732284546, "learning_rate": 8.669295481390324e-05, "loss": 0.633, "step": 3252 }, { "epoch": 0.6281135354315505, "grad_norm": 1.2166199684143066, "learning_rate": 8.668150620986478e-05, "loss": 0.6543, "step": 3253 }, { "epoch": 0.6283066229001738, "grad_norm": 1.4705307483673096, "learning_rate": 8.667005343973576e-05, "loss": 0.6701, "step": 3254 }, { "epoch": 0.628499710368797, "grad_norm": 1.834609031677246, "learning_rate": 8.665859650481693e-05, "loss": 0.6264, "step": 3255 }, { "epoch": 0.6286927978374204, "grad_norm": 1.9082927703857422, "learning_rate": 8.664713540640954e-05, "loss": 0.6216, "step": 3256 }, { "epoch": 0.6288858853060436, "grad_norm": 1.5725789070129395, "learning_rate": 8.663567014581526e-05, "loss": 0.6456, "step": 3257 }, { "epoch": 0.629078972774667, "grad_norm": 2.18243670463562, "learning_rate": 8.662420072433624e-05, "loss": 0.701, "step": 3258 }, { "epoch": 0.6292720602432902, "grad_norm": 1.9350529909133911, "learning_rate": 8.661272714327515e-05, "loss": 0.6434, "step": 3259 }, { "epoch": 0.6294651477119135, "grad_norm": 3.9776594638824463, "learning_rate": 8.660124940393507e-05, "loss": 0.6348, "step": 3260 }, { "epoch": 0.6296582351805368, "grad_norm": 7.849519729614258, "learning_rate": 8.658976750761957e-05, "loss": 0.6475, "step": 3261 }, { "epoch": 0.6298513226491601, "grad_norm": 1.5647157430648804, "learning_rate": 8.657828145563274e-05, "loss": 0.6414, "step": 3262 }, { "epoch": 0.6300444101177833, "grad_norm": 2.2948527336120605, "learning_rate": 8.65667912492791e-05, "loss": 0.597, "step": 3263 }, { "epoch": 0.6302374975864067, "grad_norm": 1.7645807266235352, "learning_rate": 8.65552968898636e-05, "loss": 0.6416, "step": 3264 }, { "epoch": 0.6304305850550299, "grad_norm": 3.5137946605682373, "learning_rate": 8.654379837869176e-05, "loss": 0.6488, "step": 3265 }, { "epoch": 0.6306236725236533, "grad_norm": 1.5748742818832397, "learning_rate": 8.653229571706949e-05, "loss": 0.6734, "step": 3266 }, { "epoch": 0.6308167599922765, "grad_norm": 1.8533300161361694, "learning_rate": 8.652078890630322e-05, "loss": 0.699, "step": 3267 }, { "epoch": 0.6310098474608998, "grad_norm": 1.5888715982437134, "learning_rate": 8.650927794769983e-05, "loss": 0.692, "step": 3268 }, { "epoch": 0.6312029349295231, "grad_norm": 3.1261985301971436, "learning_rate": 8.649776284256666e-05, "loss": 0.6383, "step": 3269 }, { "epoch": 0.6313960223981464, "grad_norm": 1.0034430027008057, "learning_rate": 8.648624359221153e-05, "loss": 0.6483, "step": 3270 }, { "epoch": 0.6315891098667696, "grad_norm": 2.14481258392334, "learning_rate": 8.647472019794277e-05, "loss": 0.6416, "step": 3271 }, { "epoch": 0.631782197335393, "grad_norm": 2.9833486080169678, "learning_rate": 8.646319266106912e-05, "loss": 0.7358, "step": 3272 }, { "epoch": 0.6319752848040162, "grad_norm": 1.7884914875030518, "learning_rate": 8.64516609828998e-05, "loss": 0.6787, "step": 3273 }, { "epoch": 0.6321683722726396, "grad_norm": 3.0023348331451416, "learning_rate": 8.644012516474456e-05, "loss": 0.6631, "step": 3274 }, { "epoch": 0.6323614597412628, "grad_norm": 2.1778481006622314, "learning_rate": 8.642858520791353e-05, "loss": 0.6933, "step": 3275 }, { "epoch": 0.6325545472098861, "grad_norm": 12.405477523803711, "learning_rate": 8.641704111371741e-05, "loss": 0.6802, "step": 3276 }, { "epoch": 0.6327476346785094, "grad_norm": 0.8560047745704651, "learning_rate": 8.640549288346726e-05, "loss": 0.6752, "step": 3277 }, { "epoch": 0.6329407221471327, "grad_norm": 2.00295352935791, "learning_rate": 8.639394051847472e-05, "loss": 0.6891, "step": 3278 }, { "epoch": 0.6331338096157559, "grad_norm": 1.0978856086730957, "learning_rate": 8.638238402005179e-05, "loss": 0.619, "step": 3279 }, { "epoch": 0.6333268970843792, "grad_norm": 0.9469472169876099, "learning_rate": 8.637082338951105e-05, "loss": 0.6424, "step": 3280 }, { "epoch": 0.6335199845530025, "grad_norm": 1.4670889377593994, "learning_rate": 8.635925862816545e-05, "loss": 0.6992, "step": 3281 }, { "epoch": 0.6337130720216257, "grad_norm": 1.4782003164291382, "learning_rate": 8.63476897373285e-05, "loss": 0.6614, "step": 3282 }, { "epoch": 0.6339061594902491, "grad_norm": 0.896179735660553, "learning_rate": 8.633611671831409e-05, "loss": 0.6639, "step": 3283 }, { "epoch": 0.6340992469588723, "grad_norm": 1.2365249395370483, "learning_rate": 8.632453957243664e-05, "loss": 0.7036, "step": 3284 }, { "epoch": 0.6342923344274957, "grad_norm": 1.5312228202819824, "learning_rate": 8.631295830101102e-05, "loss": 0.7404, "step": 3285 }, { "epoch": 0.6344854218961189, "grad_norm": 1.9708843231201172, "learning_rate": 8.630137290535258e-05, "loss": 0.6286, "step": 3286 }, { "epoch": 0.6346785093647422, "grad_norm": 1.8752232789993286, "learning_rate": 8.62897833867771e-05, "loss": 0.6658, "step": 3287 }, { "epoch": 0.6348715968333655, "grad_norm": 0.9592990279197693, "learning_rate": 8.627818974660091e-05, "loss": 0.7064, "step": 3288 }, { "epoch": 0.6350646843019888, "grad_norm": 0.8204653859138489, "learning_rate": 8.62665919861407e-05, "loss": 0.6291, "step": 3289 }, { "epoch": 0.635257771770612, "grad_norm": 0.9981194734573364, "learning_rate": 8.625499010671368e-05, "loss": 0.7187, "step": 3290 }, { "epoch": 0.6354508592392354, "grad_norm": 0.8693974614143372, "learning_rate": 8.624338410963755e-05, "loss": 0.6939, "step": 3291 }, { "epoch": 0.6356439467078586, "grad_norm": 0.8850903511047363, "learning_rate": 8.623177399623048e-05, "loss": 0.6266, "step": 3292 }, { "epoch": 0.635837034176482, "grad_norm": 1.166225552558899, "learning_rate": 8.622015976781105e-05, "loss": 0.6241, "step": 3293 }, { "epoch": 0.6360301216451052, "grad_norm": 3.200059652328491, "learning_rate": 8.620854142569835e-05, "loss": 0.6979, "step": 3294 }, { "epoch": 0.6362232091137285, "grad_norm": 1.0852442979812622, "learning_rate": 8.619691897121194e-05, "loss": 0.6536, "step": 3295 }, { "epoch": 0.6364162965823518, "grad_norm": 2.2888283729553223, "learning_rate": 8.618529240567184e-05, "loss": 0.6596, "step": 3296 }, { "epoch": 0.6366093840509751, "grad_norm": 1.1760133504867554, "learning_rate": 8.617366173039851e-05, "loss": 0.6631, "step": 3297 }, { "epoch": 0.6368024715195983, "grad_norm": 1.0967611074447632, "learning_rate": 8.616202694671292e-05, "loss": 0.6555, "step": 3298 }, { "epoch": 0.6369955589882217, "grad_norm": 1.3728049993515015, "learning_rate": 8.615038805593647e-05, "loss": 0.6014, "step": 3299 }, { "epoch": 0.6371886464568449, "grad_norm": 1.0971211194992065, "learning_rate": 8.613874505939106e-05, "loss": 0.6516, "step": 3300 }, { "epoch": 0.6373817339254683, "grad_norm": 1.034468412399292, "learning_rate": 8.612709795839904e-05, "loss": 0.6543, "step": 3301 }, { "epoch": 0.6375748213940915, "grad_norm": 1.4589797258377075, "learning_rate": 8.611544675428322e-05, "loss": 0.648, "step": 3302 }, { "epoch": 0.6377679088627148, "grad_norm": 1.985140085220337, "learning_rate": 8.610379144836688e-05, "loss": 0.6132, "step": 3303 }, { "epoch": 0.6379609963313381, "grad_norm": 0.9145210981369019, "learning_rate": 8.609213204197378e-05, "loss": 0.7352, "step": 3304 }, { "epoch": 0.6381540837999614, "grad_norm": 0.9023387432098389, "learning_rate": 8.608046853642811e-05, "loss": 0.5992, "step": 3305 }, { "epoch": 0.6383471712685846, "grad_norm": 0.9580155611038208, "learning_rate": 8.606880093305456e-05, "loss": 0.6693, "step": 3306 }, { "epoch": 0.638540258737208, "grad_norm": 0.8862018585205078, "learning_rate": 8.605712923317828e-05, "loss": 0.6015, "step": 3307 }, { "epoch": 0.6387333462058312, "grad_norm": 1.262135624885559, "learning_rate": 8.604545343812486e-05, "loss": 0.6518, "step": 3308 }, { "epoch": 0.6389264336744546, "grad_norm": 1.0655254125595093, "learning_rate": 8.603377354922041e-05, "loss": 0.6423, "step": 3309 }, { "epoch": 0.6391195211430778, "grad_norm": 1.6014984846115112, "learning_rate": 8.602208956779145e-05, "loss": 0.5807, "step": 3310 }, { "epoch": 0.6393126086117011, "grad_norm": 1.4866548776626587, "learning_rate": 8.6010401495165e-05, "loss": 0.6719, "step": 3311 }, { "epoch": 0.6395056960803244, "grad_norm": 0.6761367917060852, "learning_rate": 8.599870933266849e-05, "loss": 0.6072, "step": 3312 }, { "epoch": 0.6396987835489477, "grad_norm": 1.539595603942871, "learning_rate": 8.598701308162988e-05, "loss": 0.6606, "step": 3313 }, { "epoch": 0.639891871017571, "grad_norm": 0.70295649766922, "learning_rate": 8.597531274337757e-05, "loss": 0.6292, "step": 3314 }, { "epoch": 0.6400849584861943, "grad_norm": 2.0396273136138916, "learning_rate": 8.596360831924042e-05, "loss": 0.6498, "step": 3315 }, { "epoch": 0.6402780459548175, "grad_norm": 0.8169893026351929, "learning_rate": 8.595189981054775e-05, "loss": 0.6973, "step": 3316 }, { "epoch": 0.6404711334234409, "grad_norm": 1.1361970901489258, "learning_rate": 8.594018721862936e-05, "loss": 0.6753, "step": 3317 }, { "epoch": 0.6406642208920641, "grad_norm": 1.2157886028289795, "learning_rate": 8.59284705448155e-05, "loss": 0.6727, "step": 3318 }, { "epoch": 0.6408573083606874, "grad_norm": 4.747951030731201, "learning_rate": 8.591674979043687e-05, "loss": 0.5916, "step": 3319 }, { "epoch": 0.6410503958293107, "grad_norm": 2.13156795501709, "learning_rate": 8.590502495682469e-05, "loss": 0.6496, "step": 3320 }, { "epoch": 0.641243483297934, "grad_norm": 0.9995264410972595, "learning_rate": 8.589329604531055e-05, "loss": 0.6046, "step": 3321 }, { "epoch": 0.6414365707665572, "grad_norm": 1.0537298917770386, "learning_rate": 8.588156305722662e-05, "loss": 0.6661, "step": 3322 }, { "epoch": 0.6416296582351806, "grad_norm": 0.9460368752479553, "learning_rate": 8.586982599390542e-05, "loss": 0.7009, "step": 3323 }, { "epoch": 0.6418227457038038, "grad_norm": 1.517331838607788, "learning_rate": 8.585808485668002e-05, "loss": 0.6182, "step": 3324 }, { "epoch": 0.6420158331724272, "grad_norm": 0.6629062294960022, "learning_rate": 8.584633964688389e-05, "loss": 0.6481, "step": 3325 }, { "epoch": 0.6422089206410504, "grad_norm": 0.709954559803009, "learning_rate": 8.583459036585099e-05, "loss": 0.6511, "step": 3326 }, { "epoch": 0.6424020081096736, "grad_norm": 0.6434117555618286, "learning_rate": 8.582283701491576e-05, "loss": 0.6766, "step": 3327 }, { "epoch": 0.642595095578297, "grad_norm": 0.9360913634300232, "learning_rate": 8.581107959541307e-05, "loss": 0.7029, "step": 3328 }, { "epoch": 0.6427881830469202, "grad_norm": 2.2096691131591797, "learning_rate": 8.579931810867826e-05, "loss": 0.6549, "step": 3329 }, { "epoch": 0.6429812705155435, "grad_norm": 0.6613105535507202, "learning_rate": 8.578755255604715e-05, "loss": 0.6613, "step": 3330 }, { "epoch": 0.6431743579841668, "grad_norm": 0.8787453770637512, "learning_rate": 8.5775782938856e-05, "loss": 0.6314, "step": 3331 }, { "epoch": 0.6433674454527901, "grad_norm": 0.961810290813446, "learning_rate": 8.576400925844152e-05, "loss": 0.6996, "step": 3332 }, { "epoch": 0.6435605329214134, "grad_norm": 0.9461787939071655, "learning_rate": 8.575223151614096e-05, "loss": 0.6515, "step": 3333 }, { "epoch": 0.6437536203900367, "grad_norm": 0.6823616623878479, "learning_rate": 8.574044971329192e-05, "loss": 0.6604, "step": 3334 }, { "epoch": 0.6439467078586599, "grad_norm": 0.9833171963691711, "learning_rate": 8.572866385123253e-05, "loss": 0.6461, "step": 3335 }, { "epoch": 0.6441397953272833, "grad_norm": 0.5697477459907532, "learning_rate": 8.57168739313014e-05, "loss": 0.6797, "step": 3336 }, { "epoch": 0.6443328827959065, "grad_norm": 0.6929559707641602, "learning_rate": 8.57050799548375e-05, "loss": 0.5921, "step": 3337 }, { "epoch": 0.6445259702645298, "grad_norm": 0.9340822100639343, "learning_rate": 8.569328192318035e-05, "loss": 0.7087, "step": 3338 }, { "epoch": 0.6447190577331531, "grad_norm": 0.6759512424468994, "learning_rate": 8.568147983766996e-05, "loss": 0.6678, "step": 3339 }, { "epoch": 0.6449121452017764, "grad_norm": 0.7639084458351135, "learning_rate": 8.56696736996467e-05, "loss": 0.614, "step": 3340 }, { "epoch": 0.6451052326703997, "grad_norm": 0.667043924331665, "learning_rate": 8.565786351045144e-05, "loss": 0.6525, "step": 3341 }, { "epoch": 0.645298320139023, "grad_norm": 0.6235578060150146, "learning_rate": 8.564604927142554e-05, "loss": 0.6439, "step": 3342 }, { "epoch": 0.6454914076076462, "grad_norm": 5.249237537384033, "learning_rate": 8.563423098391079e-05, "loss": 0.6771, "step": 3343 }, { "epoch": 0.6456844950762696, "grad_norm": 0.5042905211448669, "learning_rate": 8.562240864924945e-05, "loss": 0.6577, "step": 3344 }, { "epoch": 0.6458775825448928, "grad_norm": 0.8726741075515747, "learning_rate": 8.561058226878426e-05, "loss": 0.618, "step": 3345 }, { "epoch": 0.6460706700135161, "grad_norm": 0.8873999118804932, "learning_rate": 8.559875184385835e-05, "loss": 0.6251, "step": 3346 }, { "epoch": 0.6462637574821394, "grad_norm": 0.7916169166564941, "learning_rate": 8.55869173758154e-05, "loss": 0.7039, "step": 3347 }, { "epoch": 0.6464568449507627, "grad_norm": 0.5638790130615234, "learning_rate": 8.557507886599949e-05, "loss": 0.6799, "step": 3348 }, { "epoch": 0.646649932419386, "grad_norm": 0.5001974105834961, "learning_rate": 8.556323631575518e-05, "loss": 0.6153, "step": 3349 }, { "epoch": 0.6468430198880093, "grad_norm": 8.847085952758789, "learning_rate": 8.555138972642746e-05, "loss": 0.6804, "step": 3350 }, { "epoch": 0.6470361073566325, "grad_norm": 0.8400178551673889, "learning_rate": 8.553953909936183e-05, "loss": 0.6454, "step": 3351 }, { "epoch": 0.6472291948252559, "grad_norm": 1.7774888277053833, "learning_rate": 8.552768443590422e-05, "loss": 0.6132, "step": 3352 }, { "epoch": 0.6474222822938791, "grad_norm": 0.4764457046985626, "learning_rate": 8.5515825737401e-05, "loss": 0.6134, "step": 3353 }, { "epoch": 0.6476153697625024, "grad_norm": 4.7389373779296875, "learning_rate": 8.550396300519905e-05, "loss": 0.6243, "step": 3354 }, { "epoch": 0.6478084572311257, "grad_norm": 0.8063514232635498, "learning_rate": 8.549209624064565e-05, "loss": 0.6312, "step": 3355 }, { "epoch": 0.648001544699749, "grad_norm": 1.0167659521102905, "learning_rate": 8.548022544508856e-05, "loss": 0.653, "step": 3356 }, { "epoch": 0.6481946321683723, "grad_norm": 0.9244803190231323, "learning_rate": 8.546835061987602e-05, "loss": 0.7061, "step": 3357 }, { "epoch": 0.6483877196369956, "grad_norm": 2.786468029022217, "learning_rate": 8.54564717663567e-05, "loss": 0.6319, "step": 3358 }, { "epoch": 0.6485808071056188, "grad_norm": 0.5879203081130981, "learning_rate": 8.544458888587973e-05, "loss": 0.6373, "step": 3359 }, { "epoch": 0.6487738945742422, "grad_norm": 0.6985169053077698, "learning_rate": 8.543270197979473e-05, "loss": 0.5623, "step": 3360 }, { "epoch": 0.6489669820428654, "grad_norm": 0.7252176403999329, "learning_rate": 8.542081104945174e-05, "loss": 0.7393, "step": 3361 }, { "epoch": 0.6491600695114887, "grad_norm": 0.9623334407806396, "learning_rate": 8.540891609620124e-05, "loss": 0.7209, "step": 3362 }, { "epoch": 0.649353156980112, "grad_norm": 0.7198680639266968, "learning_rate": 8.539701712139422e-05, "loss": 0.5943, "step": 3363 }, { "epoch": 0.6495462444487353, "grad_norm": 1.3200215101242065, "learning_rate": 8.538511412638213e-05, "loss": 0.6281, "step": 3364 }, { "epoch": 0.6497393319173586, "grad_norm": 0.8636926412582397, "learning_rate": 8.537320711251679e-05, "loss": 0.6358, "step": 3365 }, { "epoch": 0.6499324193859819, "grad_norm": 0.9461154341697693, "learning_rate": 8.53612960811506e-05, "loss": 0.6442, "step": 3366 }, { "epoch": 0.6501255068546051, "grad_norm": 0.7437255382537842, "learning_rate": 8.534938103363628e-05, "loss": 0.5969, "step": 3367 }, { "epoch": 0.6503185943232285, "grad_norm": 1.1994398832321167, "learning_rate": 8.533746197132715e-05, "loss": 0.6474, "step": 3368 }, { "epoch": 0.6505116817918517, "grad_norm": 1.220316767692566, "learning_rate": 8.532553889557684e-05, "loss": 0.6759, "step": 3369 }, { "epoch": 0.650704769260475, "grad_norm": 0.8435543775558472, "learning_rate": 8.531361180773958e-05, "loss": 0.6615, "step": 3370 }, { "epoch": 0.6508978567290983, "grad_norm": 0.8788073062896729, "learning_rate": 8.530168070916996e-05, "loss": 0.6383, "step": 3371 }, { "epoch": 0.6510909441977216, "grad_norm": 0.7600183486938477, "learning_rate": 8.5289745601223e-05, "loss": 0.74, "step": 3372 }, { "epoch": 0.6512840316663449, "grad_norm": 0.649832010269165, "learning_rate": 8.52778064852543e-05, "loss": 0.6508, "step": 3373 }, { "epoch": 0.6514771191349681, "grad_norm": 1.0251693725585938, "learning_rate": 8.526586336261983e-05, "loss": 0.5978, "step": 3374 }, { "epoch": 0.6516702066035914, "grad_norm": 0.7723473310470581, "learning_rate": 8.525391623467598e-05, "loss": 0.6752, "step": 3375 }, { "epoch": 0.6518632940722147, "grad_norm": 0.7884180545806885, "learning_rate": 8.524196510277968e-05, "loss": 0.6908, "step": 3376 }, { "epoch": 0.652056381540838, "grad_norm": 3.088862180709839, "learning_rate": 8.523000996828828e-05, "loss": 0.6683, "step": 3377 }, { "epoch": 0.6522494690094612, "grad_norm": 0.7200843691825867, "learning_rate": 8.521805083255954e-05, "loss": 0.626, "step": 3378 }, { "epoch": 0.6524425564780846, "grad_norm": 0.7666516900062561, "learning_rate": 8.520608769695175e-05, "loss": 0.6859, "step": 3379 }, { "epoch": 0.6526356439467078, "grad_norm": 2.5187177658081055, "learning_rate": 8.519412056282364e-05, "loss": 0.6329, "step": 3380 }, { "epoch": 0.6528287314153312, "grad_norm": 3.5741195678710938, "learning_rate": 8.518214943153431e-05, "loss": 0.6625, "step": 3381 }, { "epoch": 0.6530218188839544, "grad_norm": 1.0900450944900513, "learning_rate": 8.517017430444341e-05, "loss": 0.7073, "step": 3382 }, { "epoch": 0.6532149063525777, "grad_norm": 0.5969710946083069, "learning_rate": 8.515819518291104e-05, "loss": 0.6791, "step": 3383 }, { "epoch": 0.653407993821201, "grad_norm": 0.7789936065673828, "learning_rate": 8.514621206829767e-05, "loss": 0.655, "step": 3384 }, { "epoch": 0.6536010812898243, "grad_norm": 2.3005921840667725, "learning_rate": 8.513422496196433e-05, "loss": 0.6093, "step": 3385 }, { "epoch": 0.6537941687584475, "grad_norm": 0.6099730730056763, "learning_rate": 8.512223386527242e-05, "loss": 0.7078, "step": 3386 }, { "epoch": 0.6539872562270709, "grad_norm": 0.5630918741226196, "learning_rate": 8.511023877958382e-05, "loss": 0.5918, "step": 3387 }, { "epoch": 0.6541803436956941, "grad_norm": 0.9082244038581848, "learning_rate": 8.50982397062609e-05, "loss": 0.6751, "step": 3388 }, { "epoch": 0.6543734311643175, "grad_norm": 1.2340428829193115, "learning_rate": 8.508623664666643e-05, "loss": 0.6349, "step": 3389 }, { "epoch": 0.6545665186329407, "grad_norm": 0.7069430947303772, "learning_rate": 8.507422960216365e-05, "loss": 0.6397, "step": 3390 }, { "epoch": 0.654759606101564, "grad_norm": 0.9022899866104126, "learning_rate": 8.506221857411626e-05, "loss": 0.6717, "step": 3391 }, { "epoch": 0.6549526935701873, "grad_norm": 7.249198913574219, "learning_rate": 8.505020356388842e-05, "loss": 0.6634, "step": 3392 }, { "epoch": 0.6551457810388106, "grad_norm": 1.0880849361419678, "learning_rate": 8.503818457284473e-05, "loss": 0.6714, "step": 3393 }, { "epoch": 0.6553388685074338, "grad_norm": 0.7083918452262878, "learning_rate": 8.502616160235022e-05, "loss": 0.6626, "step": 3394 }, { "epoch": 0.6555319559760572, "grad_norm": 0.7051200866699219, "learning_rate": 8.501413465377042e-05, "loss": 0.663, "step": 3395 }, { "epoch": 0.6557250434446804, "grad_norm": 0.8179313540458679, "learning_rate": 8.500210372847127e-05, "loss": 0.6326, "step": 3396 }, { "epoch": 0.6559181309133038, "grad_norm": 0.8857004046440125, "learning_rate": 8.499006882781919e-05, "loss": 0.6288, "step": 3397 }, { "epoch": 0.656111218381927, "grad_norm": 0.6863351464271545, "learning_rate": 8.497802995318105e-05, "loss": 0.7133, "step": 3398 }, { "epoch": 0.6563043058505503, "grad_norm": 0.7269207239151001, "learning_rate": 8.496598710592412e-05, "loss": 0.6071, "step": 3399 }, { "epoch": 0.6564973933191736, "grad_norm": 0.6173771023750305, "learning_rate": 8.49539402874162e-05, "loss": 0.6342, "step": 3400 }, { "epoch": 0.6566904807877969, "grad_norm": 0.9016851186752319, "learning_rate": 8.49418894990255e-05, "loss": 0.6836, "step": 3401 }, { "epoch": 0.6568835682564201, "grad_norm": 0.6132490038871765, "learning_rate": 8.492983474212068e-05, "loss": 0.6129, "step": 3402 }, { "epoch": 0.6570766557250435, "grad_norm": 0.9015952348709106, "learning_rate": 8.491777601807086e-05, "loss": 0.6584, "step": 3403 }, { "epoch": 0.6572697431936667, "grad_norm": 1.5805414915084839, "learning_rate": 8.490571332824558e-05, "loss": 0.6098, "step": 3404 }, { "epoch": 0.65746283066229, "grad_norm": 1.0279536247253418, "learning_rate": 8.489364667401489e-05, "loss": 0.6551, "step": 3405 }, { "epoch": 0.6576559181309133, "grad_norm": 0.8513419032096863, "learning_rate": 8.488157605674925e-05, "loss": 0.64, "step": 3406 }, { "epoch": 0.6578490055995366, "grad_norm": 1.6279512643814087, "learning_rate": 8.486950147781955e-05, "loss": 0.6204, "step": 3407 }, { "epoch": 0.6580420930681599, "grad_norm": 0.7097526788711548, "learning_rate": 8.485742293859718e-05, "loss": 0.6873, "step": 3408 }, { "epoch": 0.6582351805367832, "grad_norm": 1.0809475183486938, "learning_rate": 8.484534044045396e-05, "loss": 0.7189, "step": 3409 }, { "epoch": 0.6584282680054064, "grad_norm": 1.5734387636184692, "learning_rate": 8.483325398476214e-05, "loss": 0.5942, "step": 3410 }, { "epoch": 0.6586213554740298, "grad_norm": 0.5966981053352356, "learning_rate": 8.482116357289444e-05, "loss": 0.6343, "step": 3411 }, { "epoch": 0.658814442942653, "grad_norm": 0.8603651523590088, "learning_rate": 8.480906920622402e-05, "loss": 0.696, "step": 3412 }, { "epoch": 0.6590075304112764, "grad_norm": 2.463632583618164, "learning_rate": 8.479697088612451e-05, "loss": 0.6202, "step": 3413 }, { "epoch": 0.6592006178798996, "grad_norm": 0.5020148754119873, "learning_rate": 8.478486861396997e-05, "loss": 0.6059, "step": 3414 }, { "epoch": 0.6593937053485229, "grad_norm": 0.8453328609466553, "learning_rate": 8.47727623911349e-05, "loss": 0.6759, "step": 3415 }, { "epoch": 0.6595867928171462, "grad_norm": 0.6505757570266724, "learning_rate": 8.476065221899428e-05, "loss": 0.6402, "step": 3416 }, { "epoch": 0.6597798802857695, "grad_norm": 0.6301429271697998, "learning_rate": 8.474853809892349e-05, "loss": 0.7034, "step": 3417 }, { "epoch": 0.6599729677543927, "grad_norm": 0.5812699794769287, "learning_rate": 8.473642003229841e-05, "loss": 0.6354, "step": 3418 }, { "epoch": 0.6601660552230161, "grad_norm": 1.3829952478408813, "learning_rate": 8.472429802049532e-05, "loss": 0.6315, "step": 3419 }, { "epoch": 0.6603591426916393, "grad_norm": 0.62151700258255, "learning_rate": 8.471217206489102e-05, "loss": 0.658, "step": 3420 }, { "epoch": 0.6605522301602625, "grad_norm": 0.9728379249572754, "learning_rate": 8.470004216686267e-05, "loss": 0.6706, "step": 3421 }, { "epoch": 0.6607453176288859, "grad_norm": 0.6585241556167603, "learning_rate": 8.468790832778794e-05, "loss": 0.6314, "step": 3422 }, { "epoch": 0.6609384050975091, "grad_norm": 0.5728259682655334, "learning_rate": 8.467577054904492e-05, "loss": 0.637, "step": 3423 }, { "epoch": 0.6611314925661325, "grad_norm": 1.7655245065689087, "learning_rate": 8.466362883201214e-05, "loss": 0.6331, "step": 3424 }, { "epoch": 0.6613245800347557, "grad_norm": 0.604015052318573, "learning_rate": 8.465148317806861e-05, "loss": 0.7136, "step": 3425 }, { "epoch": 0.661517667503379, "grad_norm": 0.4535538852214813, "learning_rate": 8.463933358859379e-05, "loss": 0.7079, "step": 3426 }, { "epoch": 0.6617107549720023, "grad_norm": 0.616756021976471, "learning_rate": 8.462718006496751e-05, "loss": 0.6614, "step": 3427 }, { "epoch": 0.6619038424406256, "grad_norm": 0.5345838069915771, "learning_rate": 8.461502260857014e-05, "loss": 0.6776, "step": 3428 }, { "epoch": 0.6620969299092488, "grad_norm": 0.599148690700531, "learning_rate": 8.460286122078247e-05, "loss": 0.7061, "step": 3429 }, { "epoch": 0.6622900173778722, "grad_norm": 1.2474433183670044, "learning_rate": 8.45906959029857e-05, "loss": 0.6576, "step": 3430 }, { "epoch": 0.6624831048464954, "grad_norm": 0.6164513826370239, "learning_rate": 8.457852665656149e-05, "loss": 0.688, "step": 3431 }, { "epoch": 0.6626761923151188, "grad_norm": 0.8112490177154541, "learning_rate": 8.456635348289201e-05, "loss": 0.6705, "step": 3432 }, { "epoch": 0.662869279783742, "grad_norm": 0.5162287950515747, "learning_rate": 8.455417638335977e-05, "loss": 0.6657, "step": 3433 }, { "epoch": 0.6630623672523653, "grad_norm": 0.6437657475471497, "learning_rate": 8.454199535934782e-05, "loss": 0.6553, "step": 3434 }, { "epoch": 0.6632554547209886, "grad_norm": 0.7405962944030762, "learning_rate": 8.45298104122396e-05, "loss": 0.6745, "step": 3435 }, { "epoch": 0.6634485421896119, "grad_norm": 0.6823340654373169, "learning_rate": 8.4517621543419e-05, "loss": 0.6565, "step": 3436 }, { "epoch": 0.6636416296582351, "grad_norm": 0.9299601316452026, "learning_rate": 8.450542875427039e-05, "loss": 0.7196, "step": 3437 }, { "epoch": 0.6638347171268585, "grad_norm": 1.2429884672164917, "learning_rate": 8.449323204617853e-05, "loss": 0.6284, "step": 3438 }, { "epoch": 0.6640278045954817, "grad_norm": 0.7312325239181519, "learning_rate": 8.448103142052869e-05, "loss": 0.6651, "step": 3439 }, { "epoch": 0.6642208920641051, "grad_norm": 0.5148894190788269, "learning_rate": 8.446882687870654e-05, "loss": 0.6634, "step": 3440 }, { "epoch": 0.6644139795327283, "grad_norm": 0.4168872535228729, "learning_rate": 8.44566184220982e-05, "loss": 0.6254, "step": 3441 }, { "epoch": 0.6646070670013516, "grad_norm": 0.6860392689704895, "learning_rate": 8.444440605209026e-05, "loss": 0.6596, "step": 3442 }, { "epoch": 0.6648001544699749, "grad_norm": 0.3927261233329773, "learning_rate": 8.443218977006973e-05, "loss": 0.6245, "step": 3443 }, { "epoch": 0.6649932419385982, "grad_norm": 0.5144070982933044, "learning_rate": 8.441996957742405e-05, "loss": 0.6604, "step": 3444 }, { "epoch": 0.6651863294072214, "grad_norm": 0.44267338514328003, "learning_rate": 8.440774547554114e-05, "loss": 0.7107, "step": 3445 }, { "epoch": 0.6653794168758448, "grad_norm": 0.46276187896728516, "learning_rate": 8.439551746580936e-05, "loss": 0.6927, "step": 3446 }, { "epoch": 0.665572504344468, "grad_norm": 0.7427705526351929, "learning_rate": 8.438328554961747e-05, "loss": 0.6778, "step": 3447 }, { "epoch": 0.6657655918130914, "grad_norm": 0.560741126537323, "learning_rate": 8.437104972835474e-05, "loss": 0.6391, "step": 3448 }, { "epoch": 0.6659586792817146, "grad_norm": 0.4868205785751343, "learning_rate": 8.435881000341084e-05, "loss": 0.6047, "step": 3449 }, { "epoch": 0.6661517667503379, "grad_norm": 0.5068907737731934, "learning_rate": 8.434656637617588e-05, "loss": 0.5724, "step": 3450 }, { "epoch": 0.6663448542189612, "grad_norm": 0.5235016345977783, "learning_rate": 8.433431884804045e-05, "loss": 0.5911, "step": 3451 }, { "epoch": 0.6665379416875845, "grad_norm": 0.4605424106121063, "learning_rate": 8.432206742039553e-05, "loss": 0.6345, "step": 3452 }, { "epoch": 0.6667310291562077, "grad_norm": 0.5662600994110107, "learning_rate": 8.430981209463258e-05, "loss": 0.6458, "step": 3453 }, { "epoch": 0.6669241166248311, "grad_norm": 0.8979607820510864, "learning_rate": 8.429755287214353e-05, "loss": 0.6053, "step": 3454 }, { "epoch": 0.6671172040934543, "grad_norm": 0.6197887063026428, "learning_rate": 8.428528975432066e-05, "loss": 0.7064, "step": 3455 }, { "epoch": 0.6673102915620777, "grad_norm": 0.5742630362510681, "learning_rate": 8.427302274255679e-05, "loss": 0.6222, "step": 3456 }, { "epoch": 0.6675033790307009, "grad_norm": 0.5542461276054382, "learning_rate": 8.426075183824513e-05, "loss": 0.6096, "step": 3457 }, { "epoch": 0.6676964664993242, "grad_norm": 0.4555065333843231, "learning_rate": 8.424847704277934e-05, "loss": 0.6796, "step": 3458 }, { "epoch": 0.6678895539679475, "grad_norm": 0.4455723166465759, "learning_rate": 8.423619835755352e-05, "loss": 0.6263, "step": 3459 }, { "epoch": 0.6680826414365708, "grad_norm": 0.4716097414493561, "learning_rate": 8.422391578396225e-05, "loss": 0.5912, "step": 3460 }, { "epoch": 0.668275728905194, "grad_norm": 0.8365013003349304, "learning_rate": 8.421162932340049e-05, "loss": 0.6368, "step": 3461 }, { "epoch": 0.6684688163738174, "grad_norm": 0.5524376034736633, "learning_rate": 8.419933897726367e-05, "loss": 0.6666, "step": 3462 }, { "epoch": 0.6686619038424406, "grad_norm": 0.5370798707008362, "learning_rate": 8.418704474694767e-05, "loss": 0.6517, "step": 3463 }, { "epoch": 0.668854991311064, "grad_norm": 0.4459575116634369, "learning_rate": 8.417474663384882e-05, "loss": 0.6616, "step": 3464 }, { "epoch": 0.6690480787796872, "grad_norm": 0.6099815368652344, "learning_rate": 8.416244463936385e-05, "loss": 0.6839, "step": 3465 }, { "epoch": 0.6692411662483105, "grad_norm": 0.6490288972854614, "learning_rate": 8.415013876488996e-05, "loss": 0.6701, "step": 3466 }, { "epoch": 0.6694342537169338, "grad_norm": 0.5746615529060364, "learning_rate": 8.413782901182479e-05, "loss": 0.696, "step": 3467 }, { "epoch": 0.6696273411855571, "grad_norm": 2.425450086593628, "learning_rate": 8.412551538156642e-05, "loss": 0.6701, "step": 3468 }, { "epoch": 0.6698204286541803, "grad_norm": 0.5223603844642639, "learning_rate": 8.411319787551336e-05, "loss": 0.6335, "step": 3469 }, { "epoch": 0.6700135161228036, "grad_norm": 0.4935591220855713, "learning_rate": 8.410087649506457e-05, "loss": 0.6074, "step": 3470 }, { "epoch": 0.6702066035914269, "grad_norm": 0.4450218379497528, "learning_rate": 8.408855124161943e-05, "loss": 0.6149, "step": 3471 }, { "epoch": 0.6703996910600502, "grad_norm": 0.5554332733154297, "learning_rate": 8.407622211657782e-05, "loss": 0.6666, "step": 3472 }, { "epoch": 0.6705927785286735, "grad_norm": 0.5977007150650024, "learning_rate": 8.406388912133997e-05, "loss": 0.6282, "step": 3473 }, { "epoch": 0.6707858659972967, "grad_norm": 0.668582022190094, "learning_rate": 8.405155225730663e-05, "loss": 0.6246, "step": 3474 }, { "epoch": 0.6709789534659201, "grad_norm": 0.5034180879592896, "learning_rate": 8.403921152587892e-05, "loss": 0.6614, "step": 3475 }, { "epoch": 0.6711720409345433, "grad_norm": 0.5634800791740417, "learning_rate": 8.402686692845848e-05, "loss": 0.6388, "step": 3476 }, { "epoch": 0.6713651284031666, "grad_norm": 0.50815349817276, "learning_rate": 8.40145184664473e-05, "loss": 0.662, "step": 3477 }, { "epoch": 0.6715582158717899, "grad_norm": 0.4777405858039856, "learning_rate": 8.40021661412479e-05, "loss": 0.6238, "step": 3478 }, { "epoch": 0.6717513033404132, "grad_norm": 0.6024067401885986, "learning_rate": 8.398980995426314e-05, "loss": 0.6049, "step": 3479 }, { "epoch": 0.6719443908090365, "grad_norm": 0.5078209638595581, "learning_rate": 8.397744990689639e-05, "loss": 0.6739, "step": 3480 }, { "epoch": 0.6721374782776598, "grad_norm": 0.6376562118530273, "learning_rate": 8.396508600055143e-05, "loss": 0.6321, "step": 3481 }, { "epoch": 0.672330565746283, "grad_norm": 0.6177651882171631, "learning_rate": 8.395271823663253e-05, "loss": 0.6726, "step": 3482 }, { "epoch": 0.6725236532149064, "grad_norm": 0.4813728332519531, "learning_rate": 8.394034661654432e-05, "loss": 0.728, "step": 3483 }, { "epoch": 0.6727167406835296, "grad_norm": 0.5973758101463318, "learning_rate": 8.39279711416919e-05, "loss": 0.661, "step": 3484 }, { "epoch": 0.672909828152153, "grad_norm": 0.47708237171173096, "learning_rate": 8.391559181348082e-05, "loss": 0.6526, "step": 3485 }, { "epoch": 0.6731029156207762, "grad_norm": 0.45966827869415283, "learning_rate": 8.390320863331704e-05, "loss": 0.6195, "step": 3486 }, { "epoch": 0.6732960030893995, "grad_norm": 0.6572937369346619, "learning_rate": 8.389082160260699e-05, "loss": 0.5867, "step": 3487 }, { "epoch": 0.6734890905580228, "grad_norm": 0.4978322982788086, "learning_rate": 8.387843072275754e-05, "loss": 0.6728, "step": 3488 }, { "epoch": 0.6736821780266461, "grad_norm": 1.022752046585083, "learning_rate": 8.386603599517597e-05, "loss": 0.6027, "step": 3489 }, { "epoch": 0.6738752654952693, "grad_norm": 0.6387626528739929, "learning_rate": 8.385363742127e-05, "loss": 0.6385, "step": 3490 }, { "epoch": 0.6740683529638927, "grad_norm": 1.057703971862793, "learning_rate": 8.384123500244778e-05, "loss": 0.606, "step": 3491 }, { "epoch": 0.6742614404325159, "grad_norm": 1.0486280918121338, "learning_rate": 8.382882874011795e-05, "loss": 0.7252, "step": 3492 }, { "epoch": 0.6744545279011392, "grad_norm": 0.653166651725769, "learning_rate": 8.38164186356895e-05, "loss": 0.6926, "step": 3493 }, { "epoch": 0.6746476153697625, "grad_norm": 0.8039303421974182, "learning_rate": 8.380400469057195e-05, "loss": 0.6525, "step": 3494 }, { "epoch": 0.6748407028383858, "grad_norm": 0.7553034424781799, "learning_rate": 8.379158690617517e-05, "loss": 0.6999, "step": 3495 }, { "epoch": 0.675033790307009, "grad_norm": 0.5951066613197327, "learning_rate": 8.377916528390952e-05, "loss": 0.719, "step": 3496 }, { "epoch": 0.6752268777756324, "grad_norm": 0.6840483546257019, "learning_rate": 8.376673982518581e-05, "loss": 0.667, "step": 3497 }, { "epoch": 0.6754199652442556, "grad_norm": 0.6395536065101624, "learning_rate": 8.375431053141521e-05, "loss": 0.6188, "step": 3498 }, { "epoch": 0.675613052712879, "grad_norm": 0.7987133860588074, "learning_rate": 8.37418774040094e-05, "loss": 0.6823, "step": 3499 }, { "epoch": 0.6758061401815022, "grad_norm": 0.6822597980499268, "learning_rate": 8.37294404443805e-05, "loss": 0.6472, "step": 3500 }, { "epoch": 0.6758061401815022, "eval_loss": 0.6890086531639099, "eval_runtime": 49.4825, "eval_samples_per_second": 13.419, "eval_steps_per_second": 0.424, "step": 3500 }, { "epoch": 0.6759992276501255, "grad_norm": 0.7671290636062622, "learning_rate": 8.371699965394095e-05, "loss": 0.6556, "step": 3501 }, { "epoch": 0.6761923151187488, "grad_norm": 0.8310761451721191, "learning_rate": 8.37045550341038e-05, "loss": 0.6141, "step": 3502 }, { "epoch": 0.6763854025873721, "grad_norm": 1.4498947858810425, "learning_rate": 8.369210658628237e-05, "loss": 0.625, "step": 3503 }, { "epoch": 0.6765784900559954, "grad_norm": 0.9981981515884399, "learning_rate": 8.367965431189053e-05, "loss": 0.6088, "step": 3504 }, { "epoch": 0.6767715775246187, "grad_norm": 0.701020359992981, "learning_rate": 8.366719821234252e-05, "loss": 0.6249, "step": 3505 }, { "epoch": 0.6769646649932419, "grad_norm": 0.9404657483100891, "learning_rate": 8.365473828905308e-05, "loss": 0.6406, "step": 3506 }, { "epoch": 0.6771577524618653, "grad_norm": 2.9595088958740234, "learning_rate": 8.36422745434373e-05, "loss": 0.6759, "step": 3507 }, { "epoch": 0.6773508399304885, "grad_norm": 0.8404605984687805, "learning_rate": 8.362980697691075e-05, "loss": 0.6325, "step": 3508 }, { "epoch": 0.6775439273991118, "grad_norm": 0.8771991729736328, "learning_rate": 8.361733559088945e-05, "loss": 0.6597, "step": 3509 }, { "epoch": 0.6777370148677351, "grad_norm": 0.7535029053688049, "learning_rate": 8.36048603867898e-05, "loss": 0.5906, "step": 3510 }, { "epoch": 0.6779301023363584, "grad_norm": 0.6077292561531067, "learning_rate": 8.359238136602872e-05, "loss": 0.646, "step": 3511 }, { "epoch": 0.6781231898049817, "grad_norm": 0.8291327953338623, "learning_rate": 8.357989853002346e-05, "loss": 0.6079, "step": 3512 }, { "epoch": 0.678316277273605, "grad_norm": 0.7044786810874939, "learning_rate": 8.356741188019179e-05, "loss": 0.6799, "step": 3513 }, { "epoch": 0.6785093647422282, "grad_norm": 2.155776262283325, "learning_rate": 8.355492141795185e-05, "loss": 0.6352, "step": 3514 }, { "epoch": 0.6787024522108516, "grad_norm": 0.6713203191757202, "learning_rate": 8.354242714472224e-05, "loss": 0.6262, "step": 3515 }, { "epoch": 0.6788955396794748, "grad_norm": 0.5369341373443604, "learning_rate": 8.352992906192203e-05, "loss": 0.5943, "step": 3516 }, { "epoch": 0.679088627148098, "grad_norm": 0.6555454730987549, "learning_rate": 8.351742717097067e-05, "loss": 0.6488, "step": 3517 }, { "epoch": 0.6792817146167214, "grad_norm": 0.7901085615158081, "learning_rate": 8.3504921473288e-05, "loss": 0.6262, "step": 3518 }, { "epoch": 0.6794748020853446, "grad_norm": 1.0451081991195679, "learning_rate": 8.349241197029445e-05, "loss": 0.6379, "step": 3519 }, { "epoch": 0.679667889553968, "grad_norm": 0.6602562665939331, "learning_rate": 8.347989866341073e-05, "loss": 0.6369, "step": 3520 }, { "epoch": 0.6798609770225912, "grad_norm": 0.9767576456069946, "learning_rate": 8.346738155405801e-05, "loss": 0.6083, "step": 3521 }, { "epoch": 0.6800540644912145, "grad_norm": 0.8965598940849304, "learning_rate": 8.345486064365795e-05, "loss": 0.663, "step": 3522 }, { "epoch": 0.6802471519598378, "grad_norm": 1.921337366104126, "learning_rate": 8.344233593363261e-05, "loss": 0.6602, "step": 3523 }, { "epoch": 0.6804402394284611, "grad_norm": 1.4643992185592651, "learning_rate": 8.34298074254045e-05, "loss": 0.6447, "step": 3524 }, { "epoch": 0.6806333268970843, "grad_norm": 0.6689268946647644, "learning_rate": 8.341727512039648e-05, "loss": 0.6461, "step": 3525 }, { "epoch": 0.6808264143657077, "grad_norm": 1.4770184755325317, "learning_rate": 8.340473902003195e-05, "loss": 0.636, "step": 3526 }, { "epoch": 0.6810195018343309, "grad_norm": 0.7377784848213196, "learning_rate": 8.339219912573468e-05, "loss": 0.6114, "step": 3527 }, { "epoch": 0.6812125893029543, "grad_norm": 2.565882682800293, "learning_rate": 8.337965543892888e-05, "loss": 0.6083, "step": 3528 }, { "epoch": 0.6814056767715775, "grad_norm": 0.639768660068512, "learning_rate": 8.336710796103921e-05, "loss": 0.5998, "step": 3529 }, { "epoch": 0.6815987642402008, "grad_norm": 1.3130513429641724, "learning_rate": 8.335455669349073e-05, "loss": 0.7185, "step": 3530 }, { "epoch": 0.6817918517088241, "grad_norm": 0.9792224168777466, "learning_rate": 8.334200163770896e-05, "loss": 0.6577, "step": 3531 }, { "epoch": 0.6819849391774474, "grad_norm": 1.4199856519699097, "learning_rate": 8.332944279511983e-05, "loss": 0.6648, "step": 3532 }, { "epoch": 0.6821780266460706, "grad_norm": 1.5058040618896484, "learning_rate": 8.33168801671497e-05, "loss": 0.6023, "step": 3533 }, { "epoch": 0.682371114114694, "grad_norm": 1.8345555067062378, "learning_rate": 8.330431375522539e-05, "loss": 0.656, "step": 3534 }, { "epoch": 0.6825642015833172, "grad_norm": 1.4188250303268433, "learning_rate": 8.329174356077407e-05, "loss": 0.6593, "step": 3535 }, { "epoch": 0.6827572890519406, "grad_norm": 1.6394563913345337, "learning_rate": 8.327916958522345e-05, "loss": 0.637, "step": 3536 }, { "epoch": 0.6829503765205638, "grad_norm": 0.680280864238739, "learning_rate": 8.326659183000163e-05, "loss": 0.6216, "step": 3537 }, { "epoch": 0.6831434639891871, "grad_norm": 1.439386010169983, "learning_rate": 8.325401029653706e-05, "loss": 0.6623, "step": 3538 }, { "epoch": 0.6833365514578104, "grad_norm": 0.9061694741249084, "learning_rate": 8.324142498625873e-05, "loss": 0.6342, "step": 3539 }, { "epoch": 0.6835296389264337, "grad_norm": 0.80137038230896, "learning_rate": 8.322883590059601e-05, "loss": 0.6322, "step": 3540 }, { "epoch": 0.6837227263950569, "grad_norm": 0.8870419263839722, "learning_rate": 8.321624304097868e-05, "loss": 0.671, "step": 3541 }, { "epoch": 0.6839158138636803, "grad_norm": 1.0196501016616821, "learning_rate": 8.320364640883699e-05, "loss": 0.5877, "step": 3542 }, { "epoch": 0.6841089013323035, "grad_norm": 0.8501062393188477, "learning_rate": 8.319104600560159e-05, "loss": 0.6353, "step": 3543 }, { "epoch": 0.6843019888009269, "grad_norm": 0.7758103609085083, "learning_rate": 8.317844183270357e-05, "loss": 0.6706, "step": 3544 }, { "epoch": 0.6844950762695501, "grad_norm": 0.655392050743103, "learning_rate": 8.316583389157444e-05, "loss": 0.6543, "step": 3545 }, { "epoch": 0.6846881637381734, "grad_norm": 1.096468210220337, "learning_rate": 8.315322218364615e-05, "loss": 0.6118, "step": 3546 }, { "epoch": 0.6848812512067967, "grad_norm": 1.3181101083755493, "learning_rate": 8.314060671035106e-05, "loss": 0.6283, "step": 3547 }, { "epoch": 0.68507433867542, "grad_norm": 0.81305330991745, "learning_rate": 8.312798747312198e-05, "loss": 0.624, "step": 3548 }, { "epoch": 0.6852674261440432, "grad_norm": 0.6955148577690125, "learning_rate": 8.311536447339213e-05, "loss": 0.6592, "step": 3549 }, { "epoch": 0.6854605136126666, "grad_norm": 1.0258804559707642, "learning_rate": 8.310273771259516e-05, "loss": 0.6846, "step": 3550 }, { "epoch": 0.6856536010812898, "grad_norm": 0.7014469504356384, "learning_rate": 8.309010719216517e-05, "loss": 0.6235, "step": 3551 }, { "epoch": 0.6858466885499132, "grad_norm": 1.2410149574279785, "learning_rate": 8.307747291353664e-05, "loss": 0.6484, "step": 3552 }, { "epoch": 0.6860397760185364, "grad_norm": 0.7122513651847839, "learning_rate": 8.306483487814451e-05, "loss": 0.6095, "step": 3553 }, { "epoch": 0.6862328634871597, "grad_norm": 2.0254411697387695, "learning_rate": 8.305219308742416e-05, "loss": 0.6758, "step": 3554 }, { "epoch": 0.686425950955783, "grad_norm": 0.79982590675354, "learning_rate": 8.303954754281136e-05, "loss": 0.6735, "step": 3555 }, { "epoch": 0.6866190384244063, "grad_norm": 0.8499089479446411, "learning_rate": 8.302689824574233e-05, "loss": 0.6343, "step": 3556 }, { "epoch": 0.6868121258930295, "grad_norm": 1.0672489404678345, "learning_rate": 8.30142451976537e-05, "loss": 0.7058, "step": 3557 }, { "epoch": 0.6870052133616529, "grad_norm": 1.4598273038864136, "learning_rate": 8.300158839998255e-05, "loss": 0.641, "step": 3558 }, { "epoch": 0.6871983008302761, "grad_norm": 0.9788135886192322, "learning_rate": 8.298892785416637e-05, "loss": 0.7198, "step": 3559 }, { "epoch": 0.6873913882988995, "grad_norm": 1.1747958660125732, "learning_rate": 8.297626356164306e-05, "loss": 0.6309, "step": 3560 }, { "epoch": 0.6875844757675227, "grad_norm": 0.9481674432754517, "learning_rate": 8.296359552385099e-05, "loss": 0.6315, "step": 3561 }, { "epoch": 0.687777563236146, "grad_norm": 2.0165927410125732, "learning_rate": 8.29509237422289e-05, "loss": 0.6847, "step": 3562 }, { "epoch": 0.6879706507047693, "grad_norm": 4.107883453369141, "learning_rate": 8.293824821821603e-05, "loss": 0.6554, "step": 3563 }, { "epoch": 0.6881637381733925, "grad_norm": 1.1194607019424438, "learning_rate": 8.292556895325194e-05, "loss": 0.613, "step": 3564 }, { "epoch": 0.6883568256420158, "grad_norm": 2.1788578033447266, "learning_rate": 8.291288594877671e-05, "loss": 0.6243, "step": 3565 }, { "epoch": 0.6885499131106391, "grad_norm": 2.1705474853515625, "learning_rate": 8.29001992062308e-05, "loss": 0.6548, "step": 3566 }, { "epoch": 0.6887430005792624, "grad_norm": 1.1483889818191528, "learning_rate": 8.288750872705509e-05, "loss": 0.6858, "step": 3567 }, { "epoch": 0.6889360880478856, "grad_norm": 1.3088964223861694, "learning_rate": 8.287481451269093e-05, "loss": 0.6323, "step": 3568 }, { "epoch": 0.689129175516509, "grad_norm": 1.2735507488250732, "learning_rate": 8.286211656458002e-05, "loss": 0.6811, "step": 3569 }, { "epoch": 0.6893222629851322, "grad_norm": 1.1305747032165527, "learning_rate": 8.284941488416456e-05, "loss": 0.6314, "step": 3570 }, { "epoch": 0.6895153504537556, "grad_norm": 1.3031160831451416, "learning_rate": 8.283670947288711e-05, "loss": 0.6207, "step": 3571 }, { "epoch": 0.6897084379223788, "grad_norm": 1.8756405115127563, "learning_rate": 8.282400033219071e-05, "loss": 0.6234, "step": 3572 }, { "epoch": 0.6899015253910021, "grad_norm": 1.1641852855682373, "learning_rate": 8.281128746351879e-05, "loss": 0.7051, "step": 3573 }, { "epoch": 0.6900946128596254, "grad_norm": 1.4700734615325928, "learning_rate": 8.27985708683152e-05, "loss": 0.5895, "step": 3574 }, { "epoch": 0.6902877003282487, "grad_norm": 1.05450439453125, "learning_rate": 8.278585054802422e-05, "loss": 0.6367, "step": 3575 }, { "epoch": 0.6904807877968719, "grad_norm": 2.3665876388549805, "learning_rate": 8.277312650409057e-05, "loss": 0.6493, "step": 3576 }, { "epoch": 0.6906738752654953, "grad_norm": 1.855770230293274, "learning_rate": 8.276039873795937e-05, "loss": 0.5542, "step": 3577 }, { "epoch": 0.6908669627341185, "grad_norm": 1.0987008810043335, "learning_rate": 8.27476672510762e-05, "loss": 0.6014, "step": 3578 }, { "epoch": 0.6910600502027419, "grad_norm": 1.2112808227539062, "learning_rate": 8.273493204488699e-05, "loss": 0.5792, "step": 3579 }, { "epoch": 0.6912531376713651, "grad_norm": 1.7431280612945557, "learning_rate": 8.272219312083815e-05, "loss": 0.6781, "step": 3580 }, { "epoch": 0.6914462251399884, "grad_norm": 1.2220906019210815, "learning_rate": 8.270945048037652e-05, "loss": 0.6347, "step": 3581 }, { "epoch": 0.6916393126086117, "grad_norm": 2.1606199741363525, "learning_rate": 8.269670412494933e-05, "loss": 0.7275, "step": 3582 }, { "epoch": 0.691832400077235, "grad_norm": 1.3427271842956543, "learning_rate": 8.268395405600424e-05, "loss": 0.6686, "step": 3583 }, { "epoch": 0.6920254875458582, "grad_norm": 1.3053447008132935, "learning_rate": 8.267120027498934e-05, "loss": 0.6399, "step": 3584 }, { "epoch": 0.6922185750144816, "grad_norm": 1.303562045097351, "learning_rate": 8.265844278335314e-05, "loss": 0.6555, "step": 3585 }, { "epoch": 0.6924116624831048, "grad_norm": 1.6635102033615112, "learning_rate": 8.264568158254453e-05, "loss": 0.6111, "step": 3586 }, { "epoch": 0.6926047499517282, "grad_norm": 0.801847517490387, "learning_rate": 8.263291667401292e-05, "loss": 0.6329, "step": 3587 }, { "epoch": 0.6927978374203514, "grad_norm": 1.7828298807144165, "learning_rate": 8.262014805920803e-05, "loss": 0.6808, "step": 3588 }, { "epoch": 0.6929909248889747, "grad_norm": 1.0513895750045776, "learning_rate": 8.26073757395801e-05, "loss": 0.6542, "step": 3589 }, { "epoch": 0.693184012357598, "grad_norm": 1.29887855052948, "learning_rate": 8.25945997165797e-05, "loss": 0.6761, "step": 3590 }, { "epoch": 0.6933770998262213, "grad_norm": 0.8328737616539001, "learning_rate": 8.258181999165788e-05, "loss": 0.6332, "step": 3591 }, { "epoch": 0.6935701872948445, "grad_norm": 1.3264083862304688, "learning_rate": 8.256903656626609e-05, "loss": 0.6599, "step": 3592 }, { "epoch": 0.6937632747634679, "grad_norm": 1.0370433330535889, "learning_rate": 8.255624944185621e-05, "loss": 0.6435, "step": 3593 }, { "epoch": 0.6939563622320911, "grad_norm": 0.9347022771835327, "learning_rate": 8.254345861988052e-05, "loss": 0.6334, "step": 3594 }, { "epoch": 0.6941494497007145, "grad_norm": 0.7953053712844849, "learning_rate": 8.253066410179174e-05, "loss": 0.6235, "step": 3595 }, { "epoch": 0.6943425371693377, "grad_norm": 1.3197113275527954, "learning_rate": 8.251786588904301e-05, "loss": 0.6822, "step": 3596 }, { "epoch": 0.694535624637961, "grad_norm": 1.5641237497329712, "learning_rate": 8.250506398308788e-05, "loss": 0.633, "step": 3597 }, { "epoch": 0.6947287121065843, "grad_norm": 1.3815802335739136, "learning_rate": 8.249225838538032e-05, "loss": 0.634, "step": 3598 }, { "epoch": 0.6949217995752076, "grad_norm": 1.0562851428985596, "learning_rate": 8.247944909737473e-05, "loss": 0.6854, "step": 3599 }, { "epoch": 0.6951148870438308, "grad_norm": 1.6876578330993652, "learning_rate": 8.24666361205259e-05, "loss": 0.6578, "step": 3600 }, { "epoch": 0.6953079745124542, "grad_norm": 1.390223741531372, "learning_rate": 8.245381945628908e-05, "loss": 0.673, "step": 3601 }, { "epoch": 0.6955010619810774, "grad_norm": 1.2369732856750488, "learning_rate": 8.244099910611992e-05, "loss": 0.542, "step": 3602 }, { "epoch": 0.6956941494497008, "grad_norm": 1.1079541444778442, "learning_rate": 8.242817507147447e-05, "loss": 0.625, "step": 3603 }, { "epoch": 0.695887236918324, "grad_norm": 1.2021151781082153, "learning_rate": 8.241534735380921e-05, "loss": 0.6095, "step": 3604 }, { "epoch": 0.6960803243869473, "grad_norm": 1.0192242860794067, "learning_rate": 8.240251595458108e-05, "loss": 0.6513, "step": 3605 }, { "epoch": 0.6962734118555706, "grad_norm": 2.0000386238098145, "learning_rate": 8.238968087524736e-05, "loss": 0.6054, "step": 3606 }, { "epoch": 0.6964664993241939, "grad_norm": 2.2251265048980713, "learning_rate": 8.237684211726584e-05, "loss": 0.6687, "step": 3607 }, { "epoch": 0.6966595867928171, "grad_norm": 1.9096570014953613, "learning_rate": 8.236399968209463e-05, "loss": 0.6197, "step": 3608 }, { "epoch": 0.6968526742614405, "grad_norm": 1.4119703769683838, "learning_rate": 8.235115357119233e-05, "loss": 0.6166, "step": 3609 }, { "epoch": 0.6970457617300637, "grad_norm": 2.760033130645752, "learning_rate": 8.233830378601792e-05, "loss": 0.5936, "step": 3610 }, { "epoch": 0.6972388491986871, "grad_norm": 1.1520072221755981, "learning_rate": 8.232545032803083e-05, "loss": 0.6683, "step": 3611 }, { "epoch": 0.6974319366673103, "grad_norm": 1.3085246086120605, "learning_rate": 8.231259319869087e-05, "loss": 0.6642, "step": 3612 }, { "epoch": 0.6976250241359335, "grad_norm": 3.234168529510498, "learning_rate": 8.229973239945828e-05, "loss": 0.6178, "step": 3613 }, { "epoch": 0.6978181116045569, "grad_norm": 2.2673745155334473, "learning_rate": 8.228686793179374e-05, "loss": 0.7337, "step": 3614 }, { "epoch": 0.6980111990731801, "grad_norm": 0.7504491806030273, "learning_rate": 8.227399979715833e-05, "loss": 0.6515, "step": 3615 }, { "epoch": 0.6982042865418034, "grad_norm": 1.1153454780578613, "learning_rate": 8.22611279970135e-05, "loss": 0.5932, "step": 3616 }, { "epoch": 0.6983973740104267, "grad_norm": 0.8499342203140259, "learning_rate": 8.224825253282122e-05, "loss": 0.6256, "step": 3617 }, { "epoch": 0.69859046147905, "grad_norm": 0.8951535224914551, "learning_rate": 8.22353734060438e-05, "loss": 0.6458, "step": 3618 }, { "epoch": 0.6987835489476732, "grad_norm": 0.7294445633888245, "learning_rate": 8.222249061814396e-05, "loss": 0.5976, "step": 3619 }, { "epoch": 0.6989766364162966, "grad_norm": 0.8587629199028015, "learning_rate": 8.220960417058487e-05, "loss": 0.6583, "step": 3620 }, { "epoch": 0.6991697238849198, "grad_norm": 0.7145341038703918, "learning_rate": 8.219671406483009e-05, "loss": 0.619, "step": 3621 }, { "epoch": 0.6993628113535432, "grad_norm": 1.1057512760162354, "learning_rate": 8.218382030234365e-05, "loss": 0.5982, "step": 3622 }, { "epoch": 0.6995558988221664, "grad_norm": 1.4355970621109009, "learning_rate": 8.217092288458992e-05, "loss": 0.6959, "step": 3623 }, { "epoch": 0.6997489862907897, "grad_norm": 1.177641749382019, "learning_rate": 8.215802181303374e-05, "loss": 0.6185, "step": 3624 }, { "epoch": 0.699942073759413, "grad_norm": 0.8548060655593872, "learning_rate": 8.214511708914032e-05, "loss": 0.6647, "step": 3625 }, { "epoch": 0.7001351612280363, "grad_norm": 1.0605223178863525, "learning_rate": 8.213220871437535e-05, "loss": 0.6902, "step": 3626 }, { "epoch": 0.7003282486966595, "grad_norm": 0.9401306509971619, "learning_rate": 8.211929669020485e-05, "loss": 0.7011, "step": 3627 }, { "epoch": 0.7005213361652829, "grad_norm": 1.2671808004379272, "learning_rate": 8.210638101809532e-05, "loss": 0.6323, "step": 3628 }, { "epoch": 0.7007144236339061, "grad_norm": 0.9317677021026611, "learning_rate": 8.209346169951366e-05, "loss": 0.6, "step": 3629 }, { "epoch": 0.7009075111025295, "grad_norm": 0.8510116934776306, "learning_rate": 8.208053873592718e-05, "loss": 0.6087, "step": 3630 }, { "epoch": 0.7011005985711527, "grad_norm": 0.6809743046760559, "learning_rate": 8.206761212880359e-05, "loss": 0.679, "step": 3631 }, { "epoch": 0.701293686039776, "grad_norm": 0.59401535987854, "learning_rate": 8.2054681879611e-05, "loss": 0.5945, "step": 3632 }, { "epoch": 0.7014867735083993, "grad_norm": 3.313981533050537, "learning_rate": 8.204174798981802e-05, "loss": 0.609, "step": 3633 }, { "epoch": 0.7016798609770226, "grad_norm": 0.9836559295654297, "learning_rate": 8.202881046089357e-05, "loss": 0.6391, "step": 3634 }, { "epoch": 0.7018729484456459, "grad_norm": 1.2662522792816162, "learning_rate": 8.201586929430702e-05, "loss": 0.6772, "step": 3635 }, { "epoch": 0.7020660359142692, "grad_norm": 0.6979679465293884, "learning_rate": 8.20029244915282e-05, "loss": 0.6054, "step": 3636 }, { "epoch": 0.7022591233828924, "grad_norm": 0.7456821799278259, "learning_rate": 8.198997605402725e-05, "loss": 0.6864, "step": 3637 }, { "epoch": 0.7024522108515158, "grad_norm": 0.5557667016983032, "learning_rate": 8.197702398327486e-05, "loss": 0.6289, "step": 3638 }, { "epoch": 0.702645298320139, "grad_norm": 0.9507164359092712, "learning_rate": 8.196406828074199e-05, "loss": 0.6407, "step": 3639 }, { "epoch": 0.7028383857887623, "grad_norm": 1.6758145093917847, "learning_rate": 8.195110894790013e-05, "loss": 0.6469, "step": 3640 }, { "epoch": 0.7030314732573856, "grad_norm": 0.8891608715057373, "learning_rate": 8.193814598622109e-05, "loss": 0.6811, "step": 3641 }, { "epoch": 0.7032245607260089, "grad_norm": 0.5641816258430481, "learning_rate": 8.192517939717717e-05, "loss": 0.6531, "step": 3642 }, { "epoch": 0.7034176481946322, "grad_norm": 0.6428512334823608, "learning_rate": 8.191220918224101e-05, "loss": 0.6035, "step": 3643 }, { "epoch": 0.7036107356632555, "grad_norm": 0.7663029432296753, "learning_rate": 8.189923534288573e-05, "loss": 0.6133, "step": 3644 }, { "epoch": 0.7038038231318787, "grad_norm": 1.703533411026001, "learning_rate": 8.188625788058481e-05, "loss": 0.582, "step": 3645 }, { "epoch": 0.7039969106005021, "grad_norm": 1.3059054613113403, "learning_rate": 8.187327679681218e-05, "loss": 0.5985, "step": 3646 }, { "epoch": 0.7041899980691253, "grad_norm": 0.7264933586120605, "learning_rate": 8.186029209304216e-05, "loss": 0.6304, "step": 3647 }, { "epoch": 0.7043830855377486, "grad_norm": 1.8144532442092896, "learning_rate": 8.184730377074944e-05, "loss": 0.6461, "step": 3648 }, { "epoch": 0.7045761730063719, "grad_norm": 1.173240065574646, "learning_rate": 8.183431183140925e-05, "loss": 0.6674, "step": 3649 }, { "epoch": 0.7047692604749952, "grad_norm": 1.016034483909607, "learning_rate": 8.182131627649706e-05, "loss": 0.5983, "step": 3650 }, { "epoch": 0.7049623479436185, "grad_norm": 0.5242447853088379, "learning_rate": 8.180831710748888e-05, "loss": 0.7014, "step": 3651 }, { "epoch": 0.7051554354122418, "grad_norm": 1.3622809648513794, "learning_rate": 8.17953143258611e-05, "loss": 0.6363, "step": 3652 }, { "epoch": 0.705348522880865, "grad_norm": 0.7636396288871765, "learning_rate": 8.178230793309045e-05, "loss": 0.6298, "step": 3653 }, { "epoch": 0.7055416103494884, "grad_norm": 0.7166277170181274, "learning_rate": 8.176929793065418e-05, "loss": 0.6486, "step": 3654 }, { "epoch": 0.7057346978181116, "grad_norm": 1.5766873359680176, "learning_rate": 8.175628432002989e-05, "loss": 0.6602, "step": 3655 }, { "epoch": 0.7059277852867349, "grad_norm": 0.7854254245758057, "learning_rate": 8.174326710269556e-05, "loss": 0.6347, "step": 3656 }, { "epoch": 0.7061208727553582, "grad_norm": 0.8403565883636475, "learning_rate": 8.173024628012965e-05, "loss": 0.6233, "step": 3657 }, { "epoch": 0.7063139602239815, "grad_norm": 0.9929594397544861, "learning_rate": 8.1717221853811e-05, "loss": 0.6323, "step": 3658 }, { "epoch": 0.7065070476926048, "grad_norm": 2.009462594985962, "learning_rate": 8.170419382521883e-05, "loss": 0.6868, "step": 3659 }, { "epoch": 0.706700135161228, "grad_norm": 0.7646650671958923, "learning_rate": 8.169116219583282e-05, "loss": 0.7174, "step": 3660 }, { "epoch": 0.7068932226298513, "grad_norm": 0.8152843117713928, "learning_rate": 8.167812696713301e-05, "loss": 0.652, "step": 3661 }, { "epoch": 0.7070863100984746, "grad_norm": 1.8145467042922974, "learning_rate": 8.166508814059988e-05, "loss": 0.613, "step": 3662 }, { "epoch": 0.7072793975670979, "grad_norm": 0.6601443290710449, "learning_rate": 8.165204571771432e-05, "loss": 0.6986, "step": 3663 }, { "epoch": 0.7074724850357211, "grad_norm": 0.729587197303772, "learning_rate": 8.16389996999576e-05, "loss": 0.6893, "step": 3664 }, { "epoch": 0.7076655725043445, "grad_norm": 0.7408035397529602, "learning_rate": 8.162595008881145e-05, "loss": 0.6085, "step": 3665 }, { "epoch": 0.7078586599729677, "grad_norm": 0.7705402374267578, "learning_rate": 8.161289688575795e-05, "loss": 0.7283, "step": 3666 }, { "epoch": 0.708051747441591, "grad_norm": 1.141608715057373, "learning_rate": 8.159984009227961e-05, "loss": 0.6734, "step": 3667 }, { "epoch": 0.7082448349102143, "grad_norm": 1.2706577777862549, "learning_rate": 8.158677970985937e-05, "loss": 0.6716, "step": 3668 }, { "epoch": 0.7084379223788376, "grad_norm": 0.6251603364944458, "learning_rate": 8.157371573998053e-05, "loss": 0.584, "step": 3669 }, { "epoch": 0.7086310098474609, "grad_norm": 0.6623721718788147, "learning_rate": 8.156064818412688e-05, "loss": 0.6338, "step": 3670 }, { "epoch": 0.7088240973160842, "grad_norm": 0.8792927265167236, "learning_rate": 8.154757704378249e-05, "loss": 0.6859, "step": 3671 }, { "epoch": 0.7090171847847074, "grad_norm": 0.7810660600662231, "learning_rate": 8.153450232043198e-05, "loss": 0.6735, "step": 3672 }, { "epoch": 0.7092102722533308, "grad_norm": 0.8006030321121216, "learning_rate": 8.152142401556025e-05, "loss": 0.6047, "step": 3673 }, { "epoch": 0.709403359721954, "grad_norm": 1.4649639129638672, "learning_rate": 8.15083421306527e-05, "loss": 0.5965, "step": 3674 }, { "epoch": 0.7095964471905774, "grad_norm": 1.0125727653503418, "learning_rate": 8.149525666719511e-05, "loss": 0.5974, "step": 3675 }, { "epoch": 0.7097895346592006, "grad_norm": 1.1458032131195068, "learning_rate": 8.148216762667361e-05, "loss": 0.6364, "step": 3676 }, { "epoch": 0.7099826221278239, "grad_norm": 0.5942208170890808, "learning_rate": 8.146907501057483e-05, "loss": 0.6869, "step": 3677 }, { "epoch": 0.7101757095964472, "grad_norm": 0.744009256362915, "learning_rate": 8.145597882038573e-05, "loss": 0.6417, "step": 3678 }, { "epoch": 0.7103687970650705, "grad_norm": 1.5698515176773071, "learning_rate": 8.144287905759374e-05, "loss": 0.677, "step": 3679 }, { "epoch": 0.7105618845336937, "grad_norm": 1.2633293867111206, "learning_rate": 8.142977572368662e-05, "loss": 0.698, "step": 3680 }, { "epoch": 0.7107549720023171, "grad_norm": 0.9918978214263916, "learning_rate": 8.141666882015262e-05, "loss": 0.609, "step": 3681 }, { "epoch": 0.7109480594709403, "grad_norm": 1.594928503036499, "learning_rate": 8.14035583484803e-05, "loss": 0.6821, "step": 3682 }, { "epoch": 0.7111411469395637, "grad_norm": 0.9223233461380005, "learning_rate": 8.139044431015872e-05, "loss": 0.6216, "step": 3683 }, { "epoch": 0.7113342344081869, "grad_norm": 0.9881449937820435, "learning_rate": 8.13773267066773e-05, "loss": 0.6096, "step": 3684 }, { "epoch": 0.7115273218768102, "grad_norm": 1.6547141075134277, "learning_rate": 8.136420553952584e-05, "loss": 0.6371, "step": 3685 }, { "epoch": 0.7117204093454335, "grad_norm": 1.133339762687683, "learning_rate": 8.135108081019461e-05, "loss": 0.6987, "step": 3686 }, { "epoch": 0.7119134968140568, "grad_norm": 0.6426076292991638, "learning_rate": 8.133795252017422e-05, "loss": 0.6343, "step": 3687 }, { "epoch": 0.71210658428268, "grad_norm": 1.6270644664764404, "learning_rate": 8.132482067095571e-05, "loss": 0.6166, "step": 3688 }, { "epoch": 0.7122996717513034, "grad_norm": 1.8878200054168701, "learning_rate": 8.131168526403055e-05, "loss": 0.6724, "step": 3689 }, { "epoch": 0.7124927592199266, "grad_norm": 0.6987967491149902, "learning_rate": 8.129854630089056e-05, "loss": 0.5939, "step": 3690 }, { "epoch": 0.71268584668855, "grad_norm": 1.0206820964813232, "learning_rate": 8.1285403783028e-05, "loss": 0.6935, "step": 3691 }, { "epoch": 0.7128789341571732, "grad_norm": 1.0274021625518799, "learning_rate": 8.127225771193554e-05, "loss": 0.6725, "step": 3692 }, { "epoch": 0.7130720216257965, "grad_norm": 2.8575944900512695, "learning_rate": 8.125910808910624e-05, "loss": 0.6586, "step": 3693 }, { "epoch": 0.7132651090944198, "grad_norm": 0.9247882962226868, "learning_rate": 8.124595491603357e-05, "loss": 0.6257, "step": 3694 }, { "epoch": 0.7134581965630431, "grad_norm": 0.8688456416130066, "learning_rate": 8.123279819421138e-05, "loss": 0.6702, "step": 3695 }, { "epoch": 0.7136512840316663, "grad_norm": 1.4237827062606812, "learning_rate": 8.121963792513394e-05, "loss": 0.6107, "step": 3696 }, { "epoch": 0.7138443715002897, "grad_norm": 0.7016758322715759, "learning_rate": 8.120647411029594e-05, "loss": 0.6645, "step": 3697 }, { "epoch": 0.7140374589689129, "grad_norm": 1.692467212677002, "learning_rate": 8.119330675119245e-05, "loss": 0.6706, "step": 3698 }, { "epoch": 0.7142305464375363, "grad_norm": 1.271574854850769, "learning_rate": 8.118013584931894e-05, "loss": 0.6877, "step": 3699 }, { "epoch": 0.7144236339061595, "grad_norm": 0.8566923141479492, "learning_rate": 8.116696140617128e-05, "loss": 0.6283, "step": 3700 }, { "epoch": 0.7146167213747828, "grad_norm": 1.1244771480560303, "learning_rate": 8.11537834232458e-05, "loss": 0.6484, "step": 3701 }, { "epoch": 0.7148098088434061, "grad_norm": 0.8505006432533264, "learning_rate": 8.114060190203914e-05, "loss": 0.6208, "step": 3702 }, { "epoch": 0.7150028963120294, "grad_norm": 0.882756233215332, "learning_rate": 8.112741684404841e-05, "loss": 0.6285, "step": 3703 }, { "epoch": 0.7151959837806526, "grad_norm": 1.0819116830825806, "learning_rate": 8.111422825077109e-05, "loss": 0.5998, "step": 3704 }, { "epoch": 0.715389071249276, "grad_norm": 2.0307776927948, "learning_rate": 8.110103612370508e-05, "loss": 0.5953, "step": 3705 }, { "epoch": 0.7155821587178992, "grad_norm": 0.8781945705413818, "learning_rate": 8.108784046434865e-05, "loss": 0.6275, "step": 3706 }, { "epoch": 0.7157752461865224, "grad_norm": 1.065858006477356, "learning_rate": 8.107464127420052e-05, "loss": 0.6249, "step": 3707 }, { "epoch": 0.7159683336551458, "grad_norm": 1.279893159866333, "learning_rate": 8.106143855475976e-05, "loss": 0.7342, "step": 3708 }, { "epoch": 0.716161421123769, "grad_norm": 0.7979785203933716, "learning_rate": 8.104823230752588e-05, "loss": 0.7088, "step": 3709 }, { "epoch": 0.7163545085923924, "grad_norm": 0.7981135249137878, "learning_rate": 8.103502253399875e-05, "loss": 0.635, "step": 3710 }, { "epoch": 0.7165475960610156, "grad_norm": 2.056454658508301, "learning_rate": 8.10218092356787e-05, "loss": 0.672, "step": 3711 }, { "epoch": 0.7167406835296389, "grad_norm": 0.9283109307289124, "learning_rate": 8.100859241406642e-05, "loss": 0.6928, "step": 3712 }, { "epoch": 0.7169337709982622, "grad_norm": 0.8712818026542664, "learning_rate": 8.0995372070663e-05, "loss": 0.6198, "step": 3713 }, { "epoch": 0.7171268584668855, "grad_norm": 0.8034607768058777, "learning_rate": 8.098214820696994e-05, "loss": 0.7021, "step": 3714 }, { "epoch": 0.7173199459355087, "grad_norm": 0.8198413252830505, "learning_rate": 8.096892082448913e-05, "loss": 0.6207, "step": 3715 }, { "epoch": 0.7175130334041321, "grad_norm": 1.1698853969573975, "learning_rate": 8.095568992472285e-05, "loss": 0.7063, "step": 3716 }, { "epoch": 0.7177061208727553, "grad_norm": 1.3235750198364258, "learning_rate": 8.094245550917382e-05, "loss": 0.627, "step": 3717 }, { "epoch": 0.7178992083413787, "grad_norm": 0.6581116318702698, "learning_rate": 8.092921757934512e-05, "loss": 0.6887, "step": 3718 }, { "epoch": 0.7180922958100019, "grad_norm": 1.547919750213623, "learning_rate": 8.091597613674027e-05, "loss": 0.6646, "step": 3719 }, { "epoch": 0.7182853832786252, "grad_norm": 0.8936721086502075, "learning_rate": 8.090273118286314e-05, "loss": 0.7239, "step": 3720 }, { "epoch": 0.7184784707472485, "grad_norm": 0.6469042301177979, "learning_rate": 8.088948271921801e-05, "loss": 0.6091, "step": 3721 }, { "epoch": 0.7186715582158718, "grad_norm": 0.8323136568069458, "learning_rate": 8.08762307473096e-05, "loss": 0.6446, "step": 3722 }, { "epoch": 0.718864645684495, "grad_norm": 0.551497220993042, "learning_rate": 8.0862975268643e-05, "loss": 0.6377, "step": 3723 }, { "epoch": 0.7190577331531184, "grad_norm": 0.5949428081512451, "learning_rate": 8.084971628472364e-05, "loss": 0.6973, "step": 3724 }, { "epoch": 0.7192508206217416, "grad_norm": 0.6051768064498901, "learning_rate": 8.083645379705746e-05, "loss": 0.6939, "step": 3725 }, { "epoch": 0.719443908090365, "grad_norm": 0.8602690696716309, "learning_rate": 8.082318780715074e-05, "loss": 0.6679, "step": 3726 }, { "epoch": 0.7196369955589882, "grad_norm": 1.0004087686538696, "learning_rate": 8.080991831651015e-05, "loss": 0.6706, "step": 3727 }, { "epoch": 0.7198300830276115, "grad_norm": 3.2812235355377197, "learning_rate": 8.079664532664275e-05, "loss": 0.6564, "step": 3728 }, { "epoch": 0.7200231704962348, "grad_norm": 0.5894715785980225, "learning_rate": 8.078336883905603e-05, "loss": 0.6525, "step": 3729 }, { "epoch": 0.7202162579648581, "grad_norm": 0.6174034476280212, "learning_rate": 8.07700888552579e-05, "loss": 0.6286, "step": 3730 }, { "epoch": 0.7204093454334813, "grad_norm": 2.503736734390259, "learning_rate": 8.075680537675656e-05, "loss": 0.6354, "step": 3731 }, { "epoch": 0.7206024329021047, "grad_norm": 0.6177982091903687, "learning_rate": 8.074351840506074e-05, "loss": 0.6664, "step": 3732 }, { "epoch": 0.7207955203707279, "grad_norm": 0.6098302602767944, "learning_rate": 8.073022794167944e-05, "loss": 0.6726, "step": 3733 }, { "epoch": 0.7209886078393513, "grad_norm": 0.8732232451438904, "learning_rate": 8.071693398812219e-05, "loss": 0.682, "step": 3734 }, { "epoch": 0.7211816953079745, "grad_norm": 0.5821350812911987, "learning_rate": 8.070363654589881e-05, "loss": 0.6618, "step": 3735 }, { "epoch": 0.7213747827765978, "grad_norm": 0.609035074710846, "learning_rate": 8.069033561651954e-05, "loss": 0.6078, "step": 3736 }, { "epoch": 0.7215678702452211, "grad_norm": 0.7623394131660461, "learning_rate": 8.067703120149506e-05, "loss": 0.6193, "step": 3737 }, { "epoch": 0.7217609577138444, "grad_norm": 0.5630690455436707, "learning_rate": 8.066372330233638e-05, "loss": 0.6028, "step": 3738 }, { "epoch": 0.7219540451824676, "grad_norm": 0.6626553535461426, "learning_rate": 8.065041192055498e-05, "loss": 0.7187, "step": 3739 }, { "epoch": 0.722147132651091, "grad_norm": 0.6730861663818359, "learning_rate": 8.063709705766267e-05, "loss": 0.6284, "step": 3740 }, { "epoch": 0.7223402201197142, "grad_norm": 0.7812541723251343, "learning_rate": 8.062377871517168e-05, "loss": 0.7231, "step": 3741 }, { "epoch": 0.7225333075883376, "grad_norm": 4.691396713256836, "learning_rate": 8.061045689459466e-05, "loss": 0.6278, "step": 3742 }, { "epoch": 0.7227263950569608, "grad_norm": 0.6468226909637451, "learning_rate": 8.05971315974446e-05, "loss": 0.6787, "step": 3743 }, { "epoch": 0.7229194825255841, "grad_norm": 0.4817442297935486, "learning_rate": 8.058380282523495e-05, "loss": 0.6431, "step": 3744 }, { "epoch": 0.7231125699942074, "grad_norm": 0.7628255486488342, "learning_rate": 8.057047057947949e-05, "loss": 0.5986, "step": 3745 }, { "epoch": 0.7233056574628307, "grad_norm": 0.5262361764907837, "learning_rate": 8.055713486169243e-05, "loss": 0.5681, "step": 3746 }, { "epoch": 0.7234987449314539, "grad_norm": 0.6250230073928833, "learning_rate": 8.054379567338839e-05, "loss": 0.6359, "step": 3747 }, { "epoch": 0.7236918324000773, "grad_norm": 0.8467037081718445, "learning_rate": 8.053045301608235e-05, "loss": 0.6482, "step": 3748 }, { "epoch": 0.7238849198687005, "grad_norm": 0.8675990700721741, "learning_rate": 8.051710689128971e-05, "loss": 0.6916, "step": 3749 }, { "epoch": 0.7240780073373239, "grad_norm": 0.4934847354888916, "learning_rate": 8.050375730052621e-05, "loss": 0.6568, "step": 3750 }, { "epoch": 0.7242710948059471, "grad_norm": 0.5857383012771606, "learning_rate": 8.04904042453081e-05, "loss": 0.7045, "step": 3751 }, { "epoch": 0.7244641822745704, "grad_norm": 0.4820059537887573, "learning_rate": 8.04770477271519e-05, "loss": 0.6244, "step": 3752 }, { "epoch": 0.7246572697431937, "grad_norm": 0.5229163765907288, "learning_rate": 8.046368774757455e-05, "loss": 0.6684, "step": 3753 }, { "epoch": 0.7248503572118169, "grad_norm": 0.6539388298988342, "learning_rate": 8.045032430809346e-05, "loss": 0.6427, "step": 3754 }, { "epoch": 0.7250434446804402, "grad_norm": 7.748015403747559, "learning_rate": 8.043695741022636e-05, "loss": 0.6655, "step": 3755 }, { "epoch": 0.7252365321490635, "grad_norm": 0.5709238052368164, "learning_rate": 8.042358705549136e-05, "loss": 0.6639, "step": 3756 }, { "epoch": 0.7254296196176868, "grad_norm": 0.5804509520530701, "learning_rate": 8.041021324540703e-05, "loss": 0.6388, "step": 3757 }, { "epoch": 0.72562270708631, "grad_norm": 0.6965672373771667, "learning_rate": 8.03968359814923e-05, "loss": 0.6197, "step": 3758 }, { "epoch": 0.7258157945549334, "grad_norm": 0.8712834119796753, "learning_rate": 8.038345526526646e-05, "loss": 0.6645, "step": 3759 }, { "epoch": 0.7260088820235566, "grad_norm": 0.7162882685661316, "learning_rate": 8.037007109824923e-05, "loss": 0.6506, "step": 3760 }, { "epoch": 0.72620196949218, "grad_norm": 0.7695903182029724, "learning_rate": 8.035668348196074e-05, "loss": 0.5687, "step": 3761 }, { "epoch": 0.7263950569608032, "grad_norm": 0.5959944725036621, "learning_rate": 8.034329241792145e-05, "loss": 0.672, "step": 3762 }, { "epoch": 0.7265881444294265, "grad_norm": 1.0244019031524658, "learning_rate": 8.032989790765224e-05, "loss": 0.6037, "step": 3763 }, { "epoch": 0.7267812318980498, "grad_norm": 0.6595860123634338, "learning_rate": 8.031649995267442e-05, "loss": 0.6473, "step": 3764 }, { "epoch": 0.7269743193666731, "grad_norm": 0.8493975400924683, "learning_rate": 8.030309855450964e-05, "loss": 0.5801, "step": 3765 }, { "epoch": 0.7271674068352963, "grad_norm": 0.8878989219665527, "learning_rate": 8.028969371467996e-05, "loss": 0.7414, "step": 3766 }, { "epoch": 0.7273604943039197, "grad_norm": 0.7132131457328796, "learning_rate": 8.027628543470783e-05, "loss": 0.6518, "step": 3767 }, { "epoch": 0.7275535817725429, "grad_norm": 0.5710144639015198, "learning_rate": 8.02628737161161e-05, "loss": 0.6686, "step": 3768 }, { "epoch": 0.7277466692411663, "grad_norm": 1.1573184728622437, "learning_rate": 8.0249458560428e-05, "loss": 0.7028, "step": 3769 }, { "epoch": 0.7279397567097895, "grad_norm": 4.661871910095215, "learning_rate": 8.023603996916713e-05, "loss": 0.6866, "step": 3770 }, { "epoch": 0.7281328441784128, "grad_norm": 3.797877073287964, "learning_rate": 8.022261794385753e-05, "loss": 0.6533, "step": 3771 }, { "epoch": 0.7283259316470361, "grad_norm": 0.8224618434906006, "learning_rate": 8.020919248602361e-05, "loss": 0.6583, "step": 3772 }, { "epoch": 0.7285190191156594, "grad_norm": 1.0122244358062744, "learning_rate": 8.019576359719012e-05, "loss": 0.5744, "step": 3773 }, { "epoch": 0.7287121065842826, "grad_norm": 0.718654453754425, "learning_rate": 8.018233127888228e-05, "loss": 0.6449, "step": 3774 }, { "epoch": 0.728905194052906, "grad_norm": 0.6588109731674194, "learning_rate": 8.016889553262564e-05, "loss": 0.7157, "step": 3775 }, { "epoch": 0.7290982815215292, "grad_norm": 10.815556526184082, "learning_rate": 8.015545635994616e-05, "loss": 0.6551, "step": 3776 }, { "epoch": 0.7292913689901526, "grad_norm": 0.8438568711280823, "learning_rate": 8.014201376237022e-05, "loss": 0.6598, "step": 3777 }, { "epoch": 0.7294844564587758, "grad_norm": 1.2712574005126953, "learning_rate": 8.012856774142454e-05, "loss": 0.652, "step": 3778 }, { "epoch": 0.7296775439273991, "grad_norm": 0.7168800234794617, "learning_rate": 8.011511829863623e-05, "loss": 0.7233, "step": 3779 }, { "epoch": 0.7298706313960224, "grad_norm": 0.6449830532073975, "learning_rate": 8.010166543553282e-05, "loss": 0.6736, "step": 3780 }, { "epoch": 0.7300637188646457, "grad_norm": 0.8075013756752014, "learning_rate": 8.008820915364222e-05, "loss": 0.7209, "step": 3781 }, { "epoch": 0.730256806333269, "grad_norm": 0.9441876411437988, "learning_rate": 8.007474945449272e-05, "loss": 0.6975, "step": 3782 }, { "epoch": 0.7304498938018923, "grad_norm": 0.8140929341316223, "learning_rate": 8.006128633961298e-05, "loss": 0.6684, "step": 3783 }, { "epoch": 0.7306429812705155, "grad_norm": 0.5797718167304993, "learning_rate": 8.004781981053212e-05, "loss": 0.615, "step": 3784 }, { "epoch": 0.7308360687391389, "grad_norm": 0.6677833199501038, "learning_rate": 8.003434986877955e-05, "loss": 0.5824, "step": 3785 }, { "epoch": 0.7310291562077621, "grad_norm": 0.7715868353843689, "learning_rate": 8.002087651588515e-05, "loss": 0.5944, "step": 3786 }, { "epoch": 0.7312222436763854, "grad_norm": 0.7778599858283997, "learning_rate": 8.00073997533791e-05, "loss": 0.6498, "step": 3787 }, { "epoch": 0.7314153311450087, "grad_norm": 0.7710996270179749, "learning_rate": 7.999391958279207e-05, "loss": 0.635, "step": 3788 }, { "epoch": 0.731608418613632, "grad_norm": 0.5951109528541565, "learning_rate": 7.998043600565503e-05, "loss": 0.6549, "step": 3789 }, { "epoch": 0.7318015060822552, "grad_norm": 0.6733291745185852, "learning_rate": 7.996694902349942e-05, "loss": 0.676, "step": 3790 }, { "epoch": 0.7319945935508786, "grad_norm": 1.2334014177322388, "learning_rate": 7.995345863785695e-05, "loss": 0.71, "step": 3791 }, { "epoch": 0.7321876810195018, "grad_norm": 1.1131154298782349, "learning_rate": 7.993996485025986e-05, "loss": 0.6418, "step": 3792 }, { "epoch": 0.7323807684881252, "grad_norm": 0.7451695799827576, "learning_rate": 7.992646766224066e-05, "loss": 0.612, "step": 3793 }, { "epoch": 0.7325738559567484, "grad_norm": 0.5600263476371765, "learning_rate": 7.99129670753323e-05, "loss": 0.6555, "step": 3794 }, { "epoch": 0.7327669434253717, "grad_norm": 0.6325737833976746, "learning_rate": 7.989946309106809e-05, "loss": 0.6241, "step": 3795 }, { "epoch": 0.732960030893995, "grad_norm": 0.43522658944129944, "learning_rate": 7.988595571098176e-05, "loss": 0.6194, "step": 3796 }, { "epoch": 0.7331531183626183, "grad_norm": 0.5839243531227112, "learning_rate": 7.98724449366074e-05, "loss": 0.6453, "step": 3797 }, { "epoch": 0.7333462058312415, "grad_norm": 0.8694548606872559, "learning_rate": 7.98589307694795e-05, "loss": 0.6421, "step": 3798 }, { "epoch": 0.7335392932998649, "grad_norm": 1.5764424800872803, "learning_rate": 7.98454132111329e-05, "loss": 0.6756, "step": 3799 }, { "epoch": 0.7337323807684881, "grad_norm": 0.7369539737701416, "learning_rate": 7.983189226310288e-05, "loss": 0.6381, "step": 3800 }, { "epoch": 0.7339254682371115, "grad_norm": 1.275841236114502, "learning_rate": 7.981836792692508e-05, "loss": 0.6538, "step": 3801 }, { "epoch": 0.7341185557057347, "grad_norm": 0.8234399557113647, "learning_rate": 7.98048402041355e-05, "loss": 0.6248, "step": 3802 }, { "epoch": 0.7343116431743579, "grad_norm": 0.6299683451652527, "learning_rate": 7.979130909627055e-05, "loss": 0.6626, "step": 3803 }, { "epoch": 0.7345047306429813, "grad_norm": 0.9157936573028564, "learning_rate": 7.977777460486706e-05, "loss": 0.6308, "step": 3804 }, { "epoch": 0.7346978181116045, "grad_norm": 1.7433267831802368, "learning_rate": 7.976423673146214e-05, "loss": 0.6576, "step": 3805 }, { "epoch": 0.7348909055802278, "grad_norm": 0.7454625964164734, "learning_rate": 7.975069547759342e-05, "loss": 0.6229, "step": 3806 }, { "epoch": 0.7350839930488511, "grad_norm": 0.674950897693634, "learning_rate": 7.973715084479878e-05, "loss": 0.6864, "step": 3807 }, { "epoch": 0.7352770805174744, "grad_norm": 1.149896264076233, "learning_rate": 7.972360283461657e-05, "loss": 0.6035, "step": 3808 }, { "epoch": 0.7354701679860977, "grad_norm": 1.0668619871139526, "learning_rate": 7.971005144858553e-05, "loss": 0.6219, "step": 3809 }, { "epoch": 0.735663255454721, "grad_norm": 1.2523494958877563, "learning_rate": 7.969649668824472e-05, "loss": 0.6644, "step": 3810 }, { "epoch": 0.7358563429233442, "grad_norm": 1.3366203308105469, "learning_rate": 7.968293855513364e-05, "loss": 0.6702, "step": 3811 }, { "epoch": 0.7360494303919676, "grad_norm": 0.7895267605781555, "learning_rate": 7.966937705079213e-05, "loss": 0.67, "step": 3812 }, { "epoch": 0.7362425178605908, "grad_norm": 0.9021055102348328, "learning_rate": 7.965581217676044e-05, "loss": 0.6166, "step": 3813 }, { "epoch": 0.7364356053292141, "grad_norm": 0.5581454634666443, "learning_rate": 7.96422439345792e-05, "loss": 0.6587, "step": 3814 }, { "epoch": 0.7366286927978374, "grad_norm": 3.9990150928497314, "learning_rate": 7.962867232578943e-05, "loss": 0.7051, "step": 3815 }, { "epoch": 0.7368217802664607, "grad_norm": 0.7010816335678101, "learning_rate": 7.96150973519325e-05, "loss": 0.6804, "step": 3816 }, { "epoch": 0.737014867735084, "grad_norm": 0.659359335899353, "learning_rate": 7.960151901455018e-05, "loss": 0.6756, "step": 3817 }, { "epoch": 0.7372079552037073, "grad_norm": 1.0424747467041016, "learning_rate": 7.958793731518465e-05, "loss": 0.6325, "step": 3818 }, { "epoch": 0.7374010426723305, "grad_norm": 0.9737278819084167, "learning_rate": 7.957435225537845e-05, "loss": 0.6449, "step": 3819 }, { "epoch": 0.7375941301409539, "grad_norm": 0.7972893714904785, "learning_rate": 7.956076383667447e-05, "loss": 0.5925, "step": 3820 }, { "epoch": 0.7377872176095771, "grad_norm": 1.0566073656082153, "learning_rate": 7.954717206061604e-05, "loss": 0.6307, "step": 3821 }, { "epoch": 0.7379803050782004, "grad_norm": 2.0698273181915283, "learning_rate": 7.95335769287468e-05, "loss": 0.6494, "step": 3822 }, { "epoch": 0.7381733925468237, "grad_norm": 0.8225741386413574, "learning_rate": 7.951997844261085e-05, "loss": 0.6746, "step": 3823 }, { "epoch": 0.738366480015447, "grad_norm": 2.4311578273773193, "learning_rate": 7.950637660375264e-05, "loss": 0.6983, "step": 3824 }, { "epoch": 0.7385595674840703, "grad_norm": 0.8744920492172241, "learning_rate": 7.949277141371697e-05, "loss": 0.6198, "step": 3825 }, { "epoch": 0.7387526549526936, "grad_norm": 0.5540491938591003, "learning_rate": 7.947916287404903e-05, "loss": 0.6109, "step": 3826 }, { "epoch": 0.7389457424213168, "grad_norm": 0.854030430316925, "learning_rate": 7.946555098629444e-05, "loss": 0.6747, "step": 3827 }, { "epoch": 0.7391388298899402, "grad_norm": 0.5024725198745728, "learning_rate": 7.945193575199916e-05, "loss": 0.5708, "step": 3828 }, { "epoch": 0.7393319173585634, "grad_norm": 3.652583360671997, "learning_rate": 7.943831717270954e-05, "loss": 0.654, "step": 3829 }, { "epoch": 0.7395250048271867, "grad_norm": 0.9666416645050049, "learning_rate": 7.942469524997228e-05, "loss": 0.6182, "step": 3830 }, { "epoch": 0.73971809229581, "grad_norm": 0.7144675850868225, "learning_rate": 7.94110699853345e-05, "loss": 0.6841, "step": 3831 }, { "epoch": 0.7399111797644333, "grad_norm": 2.1362717151641846, "learning_rate": 7.93974413803437e-05, "loss": 0.6389, "step": 3832 }, { "epoch": 0.7401042672330566, "grad_norm": 1.2534568309783936, "learning_rate": 7.938380943654773e-05, "loss": 0.6379, "step": 3833 }, { "epoch": 0.7402973547016799, "grad_norm": 1.9169741868972778, "learning_rate": 7.937017415549484e-05, "loss": 0.6557, "step": 3834 }, { "epoch": 0.7404904421703031, "grad_norm": 1.818678617477417, "learning_rate": 7.935653553873363e-05, "loss": 0.6712, "step": 3835 }, { "epoch": 0.7406835296389265, "grad_norm": 0.9687771797180176, "learning_rate": 7.934289358781313e-05, "loss": 0.6642, "step": 3836 }, { "epoch": 0.7408766171075497, "grad_norm": 1.0377700328826904, "learning_rate": 7.93292483042827e-05, "loss": 0.6913, "step": 3837 }, { "epoch": 0.741069704576173, "grad_norm": 1.5461353063583374, "learning_rate": 7.931559968969213e-05, "loss": 0.6907, "step": 3838 }, { "epoch": 0.7412627920447963, "grad_norm": 0.769930899143219, "learning_rate": 7.930194774559152e-05, "loss": 0.6369, "step": 3839 }, { "epoch": 0.7414558795134196, "grad_norm": 0.9369351863861084, "learning_rate": 7.92882924735314e-05, "loss": 0.664, "step": 3840 }, { "epoch": 0.7416489669820429, "grad_norm": 0.6785246133804321, "learning_rate": 7.927463387506268e-05, "loss": 0.6249, "step": 3841 }, { "epoch": 0.7418420544506662, "grad_norm": 0.7579125165939331, "learning_rate": 7.92609719517366e-05, "loss": 0.6513, "step": 3842 }, { "epoch": 0.7420351419192894, "grad_norm": 0.849955141544342, "learning_rate": 7.924730670510483e-05, "loss": 0.6695, "step": 3843 }, { "epoch": 0.7422282293879128, "grad_norm": 1.1395390033721924, "learning_rate": 7.923363813671937e-05, "loss": 0.6419, "step": 3844 }, { "epoch": 0.742421316856536, "grad_norm": 1.720082402229309, "learning_rate": 7.921996624813268e-05, "loss": 0.6786, "step": 3845 }, { "epoch": 0.7426144043251593, "grad_norm": 0.9295749664306641, "learning_rate": 7.920629104089748e-05, "loss": 0.6599, "step": 3846 }, { "epoch": 0.7428074917937826, "grad_norm": 0.6853981614112854, "learning_rate": 7.919261251656695e-05, "loss": 0.6218, "step": 3847 }, { "epoch": 0.7430005792624059, "grad_norm": 0.8255652785301208, "learning_rate": 7.917893067669464e-05, "loss": 0.679, "step": 3848 }, { "epoch": 0.7431936667310292, "grad_norm": 0.9967667460441589, "learning_rate": 7.916524552283444e-05, "loss": 0.7237, "step": 3849 }, { "epoch": 0.7433867541996524, "grad_norm": 0.7583836317062378, "learning_rate": 7.915155705654065e-05, "loss": 0.6601, "step": 3850 }, { "epoch": 0.7435798416682757, "grad_norm": 0.7100467085838318, "learning_rate": 7.91378652793679e-05, "loss": 0.6575, "step": 3851 }, { "epoch": 0.743772929136899, "grad_norm": 0.5437554121017456, "learning_rate": 7.912417019287128e-05, "loss": 0.6644, "step": 3852 }, { "epoch": 0.7439660166055223, "grad_norm": 0.627684473991394, "learning_rate": 7.911047179860618e-05, "loss": 0.6278, "step": 3853 }, { "epoch": 0.7441591040741455, "grad_norm": 0.9108797907829285, "learning_rate": 7.909677009812838e-05, "loss": 0.674, "step": 3854 }, { "epoch": 0.7443521915427689, "grad_norm": 1.7259278297424316, "learning_rate": 7.908306509299405e-05, "loss": 0.6631, "step": 3855 }, { "epoch": 0.7445452790113921, "grad_norm": 2.563885450363159, "learning_rate": 7.906935678475976e-05, "loss": 0.6127, "step": 3856 }, { "epoch": 0.7447383664800155, "grad_norm": 1.4836832284927368, "learning_rate": 7.90556451749824e-05, "loss": 0.6329, "step": 3857 }, { "epoch": 0.7449314539486387, "grad_norm": 0.507620096206665, "learning_rate": 7.904193026521927e-05, "loss": 0.6791, "step": 3858 }, { "epoch": 0.745124541417262, "grad_norm": 0.6080226302146912, "learning_rate": 7.902821205702803e-05, "loss": 0.6608, "step": 3859 }, { "epoch": 0.7453176288858853, "grad_norm": 0.5683863162994385, "learning_rate": 7.901449055196672e-05, "loss": 0.7183, "step": 3860 }, { "epoch": 0.7455107163545086, "grad_norm": 0.9173880219459534, "learning_rate": 7.900076575159379e-05, "loss": 0.6194, "step": 3861 }, { "epoch": 0.7457038038231318, "grad_norm": 0.9334734082221985, "learning_rate": 7.898703765746797e-05, "loss": 0.6153, "step": 3862 }, { "epoch": 0.7458968912917552, "grad_norm": 0.6375714540481567, "learning_rate": 7.897330627114845e-05, "loss": 0.6574, "step": 3863 }, { "epoch": 0.7460899787603784, "grad_norm": 1.4024229049682617, "learning_rate": 7.89595715941948e-05, "loss": 0.6319, "step": 3864 }, { "epoch": 0.7462830662290018, "grad_norm": 0.7589346766471863, "learning_rate": 7.89458336281669e-05, "loss": 0.6556, "step": 3865 }, { "epoch": 0.746476153697625, "grad_norm": 0.7365443110466003, "learning_rate": 7.893209237462502e-05, "loss": 0.6805, "step": 3866 }, { "epoch": 0.7466692411662483, "grad_norm": 0.8407197594642639, "learning_rate": 7.891834783512986e-05, "loss": 0.6452, "step": 3867 }, { "epoch": 0.7468623286348716, "grad_norm": 0.5674915313720703, "learning_rate": 7.890460001124242e-05, "loss": 0.6442, "step": 3868 }, { "epoch": 0.7470554161034949, "grad_norm": 1.542549967765808, "learning_rate": 7.889084890452412e-05, "loss": 0.6287, "step": 3869 }, { "epoch": 0.7472485035721181, "grad_norm": 0.5550359487533569, "learning_rate": 7.887709451653672e-05, "loss": 0.6989, "step": 3870 }, { "epoch": 0.7474415910407415, "grad_norm": 0.5723302364349365, "learning_rate": 7.88633368488424e-05, "loss": 0.6453, "step": 3871 }, { "epoch": 0.7476346785093647, "grad_norm": 0.8581286072731018, "learning_rate": 7.884957590300367e-05, "loss": 0.6206, "step": 3872 }, { "epoch": 0.7478277659779881, "grad_norm": 0.9954748153686523, "learning_rate": 7.883581168058341e-05, "loss": 0.6923, "step": 3873 }, { "epoch": 0.7480208534466113, "grad_norm": 0.6530491709709167, "learning_rate": 7.88220441831449e-05, "loss": 0.6353, "step": 3874 }, { "epoch": 0.7482139409152346, "grad_norm": 1.1897914409637451, "learning_rate": 7.880827341225178e-05, "loss": 0.6364, "step": 3875 }, { "epoch": 0.7484070283838579, "grad_norm": 1.0330891609191895, "learning_rate": 7.879449936946805e-05, "loss": 0.6297, "step": 3876 }, { "epoch": 0.7486001158524812, "grad_norm": 0.701812744140625, "learning_rate": 7.878072205635809e-05, "loss": 0.629, "step": 3877 }, { "epoch": 0.7487932033211044, "grad_norm": 3.3116908073425293, "learning_rate": 7.87669414744867e-05, "loss": 0.6276, "step": 3878 }, { "epoch": 0.7489862907897278, "grad_norm": 0.5573810935020447, "learning_rate": 7.875315762541895e-05, "loss": 0.6762, "step": 3879 }, { "epoch": 0.749179378258351, "grad_norm": 0.699683666229248, "learning_rate": 7.873937051072035e-05, "loss": 0.6789, "step": 3880 }, { "epoch": 0.7493724657269744, "grad_norm": 0.6543854475021362, "learning_rate": 7.872558013195679e-05, "loss": 0.6864, "step": 3881 }, { "epoch": 0.7495655531955976, "grad_norm": 0.545147180557251, "learning_rate": 7.871178649069447e-05, "loss": 0.6101, "step": 3882 }, { "epoch": 0.7497586406642209, "grad_norm": 0.7002601027488708, "learning_rate": 7.869798958850003e-05, "loss": 0.6199, "step": 3883 }, { "epoch": 0.7499517281328442, "grad_norm": 0.6707696318626404, "learning_rate": 7.868418942694045e-05, "loss": 0.5805, "step": 3884 }, { "epoch": 0.7501448156014675, "grad_norm": 0.8784749507904053, "learning_rate": 7.867038600758305e-05, "loss": 0.6706, "step": 3885 }, { "epoch": 0.7503379030700907, "grad_norm": 0.8034874796867371, "learning_rate": 7.865657933199557e-05, "loss": 0.6277, "step": 3886 }, { "epoch": 0.7505309905387141, "grad_norm": 0.719213604927063, "learning_rate": 7.864276940174611e-05, "loss": 0.6562, "step": 3887 }, { "epoch": 0.7507240780073373, "grad_norm": 1.1627142429351807, "learning_rate": 7.862895621840312e-05, "loss": 0.6649, "step": 3888 }, { "epoch": 0.7509171654759607, "grad_norm": 1.029930830001831, "learning_rate": 7.861513978353539e-05, "loss": 0.6603, "step": 3889 }, { "epoch": 0.7511102529445839, "grad_norm": 0.7567494511604309, "learning_rate": 7.860132009871217e-05, "loss": 0.6738, "step": 3890 }, { "epoch": 0.7513033404132072, "grad_norm": 0.8631916046142578, "learning_rate": 7.858749716550302e-05, "loss": 0.6362, "step": 3891 }, { "epoch": 0.7514964278818305, "grad_norm": 0.6215409636497498, "learning_rate": 7.857367098547784e-05, "loss": 0.6038, "step": 3892 }, { "epoch": 0.7516895153504538, "grad_norm": 0.8582572937011719, "learning_rate": 7.855984156020696e-05, "loss": 0.6067, "step": 3893 }, { "epoch": 0.751882602819077, "grad_norm": 0.6400330662727356, "learning_rate": 7.854600889126105e-05, "loss": 0.6413, "step": 3894 }, { "epoch": 0.7520756902877004, "grad_norm": 1.504412293434143, "learning_rate": 7.853217298021115e-05, "loss": 0.5732, "step": 3895 }, { "epoch": 0.7522687777563236, "grad_norm": 1.1501412391662598, "learning_rate": 7.851833382862866e-05, "loss": 0.5796, "step": 3896 }, { "epoch": 0.7524618652249468, "grad_norm": 0.6894485354423523, "learning_rate": 7.850449143808537e-05, "loss": 0.6631, "step": 3897 }, { "epoch": 0.7526549526935702, "grad_norm": 1.4332202672958374, "learning_rate": 7.849064581015343e-05, "loss": 0.6136, "step": 3898 }, { "epoch": 0.7528480401621934, "grad_norm": 0.579323410987854, "learning_rate": 7.847679694640532e-05, "loss": 0.582, "step": 3899 }, { "epoch": 0.7530411276308168, "grad_norm": 0.501309335231781, "learning_rate": 7.846294484841397e-05, "loss": 0.6439, "step": 3900 }, { "epoch": 0.75323421509944, "grad_norm": 0.8762698769569397, "learning_rate": 7.844908951775259e-05, "loss": 0.7065, "step": 3901 }, { "epoch": 0.7534273025680633, "grad_norm": 0.5108374357223511, "learning_rate": 7.843523095599481e-05, "loss": 0.6272, "step": 3902 }, { "epoch": 0.7536203900366866, "grad_norm": 1.1086536645889282, "learning_rate": 7.842136916471461e-05, "loss": 0.6658, "step": 3903 }, { "epoch": 0.7538134775053099, "grad_norm": 0.5181733965873718, "learning_rate": 7.840750414548633e-05, "loss": 0.6372, "step": 3904 }, { "epoch": 0.7540065649739331, "grad_norm": 1.4953055381774902, "learning_rate": 7.839363589988469e-05, "loss": 0.6998, "step": 3905 }, { "epoch": 0.7541996524425565, "grad_norm": 1.0375111103057861, "learning_rate": 7.837976442948478e-05, "loss": 0.6444, "step": 3906 }, { "epoch": 0.7543927399111797, "grad_norm": 0.8608935475349426, "learning_rate": 7.836588973586204e-05, "loss": 0.6434, "step": 3907 }, { "epoch": 0.7545858273798031, "grad_norm": 0.6966664791107178, "learning_rate": 7.835201182059229e-05, "loss": 0.6444, "step": 3908 }, { "epoch": 0.7547789148484263, "grad_norm": 4.823271751403809, "learning_rate": 7.833813068525169e-05, "loss": 0.6711, "step": 3909 }, { "epoch": 0.7549720023170496, "grad_norm": 0.7463254928588867, "learning_rate": 7.832424633141682e-05, "loss": 0.6082, "step": 3910 }, { "epoch": 0.7551650897856729, "grad_norm": 0.9775930047035217, "learning_rate": 7.831035876066454e-05, "loss": 0.6464, "step": 3911 }, { "epoch": 0.7553581772542962, "grad_norm": 0.9177309274673462, "learning_rate": 7.829646797457219e-05, "loss": 0.6767, "step": 3912 }, { "epoch": 0.7555512647229194, "grad_norm": 0.6923829913139343, "learning_rate": 7.828257397471737e-05, "loss": 0.7002, "step": 3913 }, { "epoch": 0.7557443521915428, "grad_norm": 0.9614987373352051, "learning_rate": 7.82686767626781e-05, "loss": 0.6956, "step": 3914 }, { "epoch": 0.755937439660166, "grad_norm": 0.6564022302627563, "learning_rate": 7.825477634003276e-05, "loss": 0.7082, "step": 3915 }, { "epoch": 0.7561305271287894, "grad_norm": 0.6421145796775818, "learning_rate": 7.824087270836004e-05, "loss": 0.592, "step": 3916 }, { "epoch": 0.7563236145974126, "grad_norm": 5.872541427612305, "learning_rate": 7.82269658692391e-05, "loss": 0.6681, "step": 3917 }, { "epoch": 0.7565167020660359, "grad_norm": 0.8745604157447815, "learning_rate": 7.821305582424938e-05, "loss": 0.6802, "step": 3918 }, { "epoch": 0.7567097895346592, "grad_norm": 0.7164022922515869, "learning_rate": 7.81991425749707e-05, "loss": 0.6091, "step": 3919 }, { "epoch": 0.7569028770032825, "grad_norm": 1.772155523300171, "learning_rate": 7.818522612298326e-05, "loss": 0.6499, "step": 3920 }, { "epoch": 0.7570959644719057, "grad_norm": 0.6715973615646362, "learning_rate": 7.817130646986764e-05, "loss": 0.6387, "step": 3921 }, { "epoch": 0.7572890519405291, "grad_norm": 0.6808881163597107, "learning_rate": 7.815738361720471e-05, "loss": 0.7059, "step": 3922 }, { "epoch": 0.7574821394091523, "grad_norm": 0.9055457711219788, "learning_rate": 7.814345756657579e-05, "loss": 0.6364, "step": 3923 }, { "epoch": 0.7576752268777757, "grad_norm": 0.9837268590927124, "learning_rate": 7.812952831956253e-05, "loss": 0.6483, "step": 3924 }, { "epoch": 0.7578683143463989, "grad_norm": 0.9223448634147644, "learning_rate": 7.811559587774693e-05, "loss": 0.7295, "step": 3925 }, { "epoch": 0.7580614018150222, "grad_norm": 0.6313437819480896, "learning_rate": 7.810166024271135e-05, "loss": 0.6717, "step": 3926 }, { "epoch": 0.7582544892836455, "grad_norm": 0.5614602565765381, "learning_rate": 7.808772141603855e-05, "loss": 0.704, "step": 3927 }, { "epoch": 0.7584475767522688, "grad_norm": 0.7869294881820679, "learning_rate": 7.807377939931159e-05, "loss": 0.6359, "step": 3928 }, { "epoch": 0.758640664220892, "grad_norm": 0.847221314907074, "learning_rate": 7.805983419411398e-05, "loss": 0.6522, "step": 3929 }, { "epoch": 0.7588337516895154, "grad_norm": 0.7361181378364563, "learning_rate": 7.804588580202952e-05, "loss": 0.6213, "step": 3930 }, { "epoch": 0.7590268391581386, "grad_norm": 0.700923502445221, "learning_rate": 7.803193422464237e-05, "loss": 0.6653, "step": 3931 }, { "epoch": 0.759219926626762, "grad_norm": 0.6028203368186951, "learning_rate": 7.801797946353713e-05, "loss": 0.5065, "step": 3932 }, { "epoch": 0.7594130140953852, "grad_norm": 0.7154591083526611, "learning_rate": 7.800402152029864e-05, "loss": 0.6533, "step": 3933 }, { "epoch": 0.7596061015640085, "grad_norm": 0.681208610534668, "learning_rate": 7.799006039651222e-05, "loss": 0.6332, "step": 3934 }, { "epoch": 0.7597991890326318, "grad_norm": 0.80063796043396, "learning_rate": 7.797609609376348e-05, "loss": 0.632, "step": 3935 }, { "epoch": 0.7599922765012551, "grad_norm": 0.6288535594940186, "learning_rate": 7.796212861363843e-05, "loss": 0.5695, "step": 3936 }, { "epoch": 0.7601853639698783, "grad_norm": 0.7549281716346741, "learning_rate": 7.794815795772339e-05, "loss": 0.5917, "step": 3937 }, { "epoch": 0.7603784514385017, "grad_norm": 2.0677120685577393, "learning_rate": 7.793418412760509e-05, "loss": 0.6915, "step": 3938 }, { "epoch": 0.7605715389071249, "grad_norm": 0.6010797619819641, "learning_rate": 7.792020712487063e-05, "loss": 0.6795, "step": 3939 }, { "epoch": 0.7607646263757483, "grad_norm": 1.4217034578323364, "learning_rate": 7.790622695110738e-05, "loss": 0.6185, "step": 3940 }, { "epoch": 0.7609577138443715, "grad_norm": 1.3216692209243774, "learning_rate": 7.78922436079032e-05, "loss": 0.6454, "step": 3941 }, { "epoch": 0.7611508013129948, "grad_norm": 0.624271035194397, "learning_rate": 7.787825709684622e-05, "loss": 0.6445, "step": 3942 }, { "epoch": 0.7613438887816181, "grad_norm": 1.3805906772613525, "learning_rate": 7.786426741952493e-05, "loss": 0.5876, "step": 3943 }, { "epoch": 0.7615369762502413, "grad_norm": 0.6861224174499512, "learning_rate": 7.785027457752823e-05, "loss": 0.6184, "step": 3944 }, { "epoch": 0.7617300637188646, "grad_norm": 3.1635005474090576, "learning_rate": 7.783627857244534e-05, "loss": 0.6488, "step": 3945 }, { "epoch": 0.7619231511874879, "grad_norm": 0.5442464351654053, "learning_rate": 7.782227940586586e-05, "loss": 0.7148, "step": 3946 }, { "epoch": 0.7621162386561112, "grad_norm": 0.5194402933120728, "learning_rate": 7.780827707937974e-05, "loss": 0.7219, "step": 3947 }, { "epoch": 0.7623093261247345, "grad_norm": 0.45636236667633057, "learning_rate": 7.779427159457728e-05, "loss": 0.6595, "step": 3948 }, { "epoch": 0.7625024135933578, "grad_norm": 0.8145459294319153, "learning_rate": 7.778026295304916e-05, "loss": 0.6422, "step": 3949 }, { "epoch": 0.762695501061981, "grad_norm": 0.7736799120903015, "learning_rate": 7.776625115638642e-05, "loss": 0.711, "step": 3950 }, { "epoch": 0.7628885885306044, "grad_norm": 0.5914008021354675, "learning_rate": 7.775223620618041e-05, "loss": 0.711, "step": 3951 }, { "epoch": 0.7630816759992276, "grad_norm": 0.6684074997901917, "learning_rate": 7.77382181040229e-05, "loss": 0.7149, "step": 3952 }, { "epoch": 0.763274763467851, "grad_norm": 0.7434825897216797, "learning_rate": 7.772419685150598e-05, "loss": 0.6901, "step": 3953 }, { "epoch": 0.7634678509364742, "grad_norm": 0.6527144908905029, "learning_rate": 7.771017245022211e-05, "loss": 0.6821, "step": 3954 }, { "epoch": 0.7636609384050975, "grad_norm": 0.7720116376876831, "learning_rate": 7.769614490176411e-05, "loss": 0.676, "step": 3955 }, { "epoch": 0.7638540258737208, "grad_norm": 0.6752985119819641, "learning_rate": 7.768211420772517e-05, "loss": 0.6739, "step": 3956 }, { "epoch": 0.7640471133423441, "grad_norm": 0.4861244261264801, "learning_rate": 7.766808036969879e-05, "loss": 0.685, "step": 3957 }, { "epoch": 0.7642402008109673, "grad_norm": 0.6236766576766968, "learning_rate": 7.765404338927889e-05, "loss": 0.6196, "step": 3958 }, { "epoch": 0.7644332882795907, "grad_norm": 0.7700256705284119, "learning_rate": 7.764000326805967e-05, "loss": 0.6283, "step": 3959 }, { "epoch": 0.7646263757482139, "grad_norm": 0.46730297803878784, "learning_rate": 7.762596000763578e-05, "loss": 0.6844, "step": 3960 }, { "epoch": 0.7648194632168372, "grad_norm": 0.5765610337257385, "learning_rate": 7.761191360960217e-05, "loss": 0.6007, "step": 3961 }, { "epoch": 0.7650125506854605, "grad_norm": 0.6909611225128174, "learning_rate": 7.759786407555413e-05, "loss": 0.6741, "step": 3962 }, { "epoch": 0.7652056381540838, "grad_norm": 0.8655089735984802, "learning_rate": 7.758381140708737e-05, "loss": 0.6602, "step": 3963 }, { "epoch": 0.765398725622707, "grad_norm": 0.5116157531738281, "learning_rate": 7.756975560579789e-05, "loss": 0.6335, "step": 3964 }, { "epoch": 0.7655918130913304, "grad_norm": 0.6216524839401245, "learning_rate": 7.755569667328208e-05, "loss": 0.6097, "step": 3965 }, { "epoch": 0.7657849005599536, "grad_norm": 0.7641874551773071, "learning_rate": 7.754163461113667e-05, "loss": 0.6564, "step": 3966 }, { "epoch": 0.765977988028577, "grad_norm": 0.6774432063102722, "learning_rate": 7.752756942095877e-05, "loss": 0.6987, "step": 3967 }, { "epoch": 0.7661710754972002, "grad_norm": 0.5993147492408752, "learning_rate": 7.75135011043458e-05, "loss": 0.6136, "step": 3968 }, { "epoch": 0.7663641629658235, "grad_norm": 0.48372966051101685, "learning_rate": 7.74994296628956e-05, "loss": 0.6665, "step": 3969 }, { "epoch": 0.7665572504344468, "grad_norm": 0.6283329725265503, "learning_rate": 7.748535509820634e-05, "loss": 0.6437, "step": 3970 }, { "epoch": 0.7667503379030701, "grad_norm": 0.5519633889198303, "learning_rate": 7.747127741187647e-05, "loss": 0.6797, "step": 3971 }, { "epoch": 0.7669434253716934, "grad_norm": 0.7892414331436157, "learning_rate": 7.745719660550492e-05, "loss": 0.6851, "step": 3972 }, { "epoch": 0.7671365128403167, "grad_norm": 1.054952621459961, "learning_rate": 7.74431126806909e-05, "loss": 0.6504, "step": 3973 }, { "epoch": 0.7673296003089399, "grad_norm": 7.154529094696045, "learning_rate": 7.742902563903397e-05, "loss": 0.6258, "step": 3974 }, { "epoch": 0.7675226877775633, "grad_norm": 0.8610357642173767, "learning_rate": 7.741493548213407e-05, "loss": 0.6194, "step": 3975 }, { "epoch": 0.7677157752461865, "grad_norm": 1.0978167057037354, "learning_rate": 7.74008422115915e-05, "loss": 0.6264, "step": 3976 }, { "epoch": 0.7679088627148098, "grad_norm": 0.6066487431526184, "learning_rate": 7.738674582900687e-05, "loss": 0.6664, "step": 3977 }, { "epoch": 0.7681019501834331, "grad_norm": 0.4933100640773773, "learning_rate": 7.737264633598119e-05, "loss": 0.6752, "step": 3978 }, { "epoch": 0.7682950376520564, "grad_norm": 0.6874785423278809, "learning_rate": 7.73585437341158e-05, "loss": 0.6518, "step": 3979 }, { "epoch": 0.7684881251206797, "grad_norm": 0.9343470931053162, "learning_rate": 7.734443802501239e-05, "loss": 0.6807, "step": 3980 }, { "epoch": 0.768681212589303, "grad_norm": 0.628801703453064, "learning_rate": 7.733032921027305e-05, "loss": 0.6895, "step": 3981 }, { "epoch": 0.7688743000579262, "grad_norm": 0.6821556687355042, "learning_rate": 7.731621729150013e-05, "loss": 0.6434, "step": 3982 }, { "epoch": 0.7690673875265496, "grad_norm": 0.5855782628059387, "learning_rate": 7.730210227029641e-05, "loss": 0.6532, "step": 3983 }, { "epoch": 0.7692604749951728, "grad_norm": 0.9220808744430542, "learning_rate": 7.728798414826503e-05, "loss": 0.6973, "step": 3984 }, { "epoch": 0.7694535624637961, "grad_norm": 1.1160304546356201, "learning_rate": 7.727386292700942e-05, "loss": 0.6605, "step": 3985 }, { "epoch": 0.7696466499324194, "grad_norm": 0.6902434229850769, "learning_rate": 7.725973860813338e-05, "loss": 0.6517, "step": 3986 }, { "epoch": 0.7698397374010427, "grad_norm": 0.5367721319198608, "learning_rate": 7.72456111932411e-05, "loss": 0.6342, "step": 3987 }, { "epoch": 0.770032824869666, "grad_norm": 0.7773317694664001, "learning_rate": 7.723148068393709e-05, "loss": 0.6536, "step": 3988 }, { "epoch": 0.7702259123382893, "grad_norm": 0.6209920048713684, "learning_rate": 7.721734708182621e-05, "loss": 0.6555, "step": 3989 }, { "epoch": 0.7704189998069125, "grad_norm": 0.7650394439697266, "learning_rate": 7.72032103885137e-05, "loss": 0.6574, "step": 3990 }, { "epoch": 0.7706120872755359, "grad_norm": 1.7784404754638672, "learning_rate": 7.718907060560511e-05, "loss": 0.7321, "step": 3991 }, { "epoch": 0.7708051747441591, "grad_norm": 0.6026117205619812, "learning_rate": 7.717492773470638e-05, "loss": 0.5934, "step": 3992 }, { "epoch": 0.7709982622127823, "grad_norm": 0.5299949049949646, "learning_rate": 7.716078177742376e-05, "loss": 0.7135, "step": 3993 }, { "epoch": 0.7711913496814057, "grad_norm": 0.646794855594635, "learning_rate": 7.714663273536388e-05, "loss": 0.5992, "step": 3994 }, { "epoch": 0.7713844371500289, "grad_norm": 0.7647431492805481, "learning_rate": 7.713248061013371e-05, "loss": 0.5722, "step": 3995 }, { "epoch": 0.7715775246186523, "grad_norm": 0.9105339646339417, "learning_rate": 7.71183254033406e-05, "loss": 0.6431, "step": 3996 }, { "epoch": 0.7717706120872755, "grad_norm": 1.2105504274368286, "learning_rate": 7.710416711659218e-05, "loss": 0.701, "step": 3997 }, { "epoch": 0.7719636995558988, "grad_norm": 1.4540072679519653, "learning_rate": 7.709000575149652e-05, "loss": 0.6222, "step": 3998 }, { "epoch": 0.7721567870245221, "grad_norm": 0.7419961094856262, "learning_rate": 7.707584130966194e-05, "loss": 0.6785, "step": 3999 }, { "epoch": 0.7723498744931454, "grad_norm": 0.9541937112808228, "learning_rate": 7.706167379269721e-05, "loss": 0.7004, "step": 4000 }, { "epoch": 0.7723498744931454, "eval_loss": 0.685865044593811, "eval_runtime": 49.5236, "eval_samples_per_second": 13.408, "eval_steps_per_second": 0.424, "step": 4000 }, { "epoch": 0.7725429619617686, "grad_norm": 1.2210779190063477, "learning_rate": 7.704750320221138e-05, "loss": 0.6042, "step": 4001 }, { "epoch": 0.772736049430392, "grad_norm": 0.5986214876174927, "learning_rate": 7.703332953981386e-05, "loss": 0.6443, "step": 4002 }, { "epoch": 0.7729291368990152, "grad_norm": 0.700710117816925, "learning_rate": 7.701915280711443e-05, "loss": 0.7257, "step": 4003 }, { "epoch": 0.7731222243676386, "grad_norm": 0.9541746973991394, "learning_rate": 7.700497300572323e-05, "loss": 0.5865, "step": 4004 }, { "epoch": 0.7733153118362618, "grad_norm": 0.6403926014900208, "learning_rate": 7.699079013725069e-05, "loss": 0.5844, "step": 4005 }, { "epoch": 0.7735083993048851, "grad_norm": 0.5479387044906616, "learning_rate": 7.697660420330764e-05, "loss": 0.6674, "step": 4006 }, { "epoch": 0.7737014867735084, "grad_norm": 0.9483046531677246, "learning_rate": 7.696241520550526e-05, "loss": 0.6745, "step": 4007 }, { "epoch": 0.7738945742421317, "grad_norm": 0.8949880599975586, "learning_rate": 7.694822314545502e-05, "loss": 0.5928, "step": 4008 }, { "epoch": 0.7740876617107549, "grad_norm": 0.8373573422431946, "learning_rate": 7.693402802476882e-05, "loss": 0.6635, "step": 4009 }, { "epoch": 0.7742807491793783, "grad_norm": 0.970200777053833, "learning_rate": 7.691982984505884e-05, "loss": 0.6358, "step": 4010 }, { "epoch": 0.7744738366480015, "grad_norm": 1.1959518194198608, "learning_rate": 7.690562860793764e-05, "loss": 0.5862, "step": 4011 }, { "epoch": 0.7746669241166249, "grad_norm": 1.5056474208831787, "learning_rate": 7.689142431501812e-05, "loss": 0.6083, "step": 4012 }, { "epoch": 0.7748600115852481, "grad_norm": 0.8045438528060913, "learning_rate": 7.687721696791354e-05, "loss": 0.6706, "step": 4013 }, { "epoch": 0.7750530990538714, "grad_norm": 0.5758229494094849, "learning_rate": 7.68630065682375e-05, "loss": 0.6419, "step": 4014 }, { "epoch": 0.7752461865224947, "grad_norm": 0.6886419057846069, "learning_rate": 7.684879311760393e-05, "loss": 0.59, "step": 4015 }, { "epoch": 0.775439273991118, "grad_norm": 1.9350941181182861, "learning_rate": 7.68345766176271e-05, "loss": 0.6256, "step": 4016 }, { "epoch": 0.7756323614597412, "grad_norm": 0.8406371474266052, "learning_rate": 7.682035706992167e-05, "loss": 0.6341, "step": 4017 }, { "epoch": 0.7758254489283646, "grad_norm": 0.7735614776611328, "learning_rate": 7.680613447610261e-05, "loss": 0.6646, "step": 4018 }, { "epoch": 0.7760185363969878, "grad_norm": 0.8615487813949585, "learning_rate": 7.679190883778524e-05, "loss": 0.6597, "step": 4019 }, { "epoch": 0.7762116238656112, "grad_norm": 0.8593424558639526, "learning_rate": 7.677768015658524e-05, "loss": 0.616, "step": 4020 }, { "epoch": 0.7764047113342344, "grad_norm": 0.5942689776420593, "learning_rate": 7.676344843411867e-05, "loss": 0.6307, "step": 4021 }, { "epoch": 0.7765977988028577, "grad_norm": 0.944916307926178, "learning_rate": 7.674921367200181e-05, "loss": 0.6102, "step": 4022 }, { "epoch": 0.776790886271481, "grad_norm": 0.7387428283691406, "learning_rate": 7.673497587185141e-05, "loss": 0.6682, "step": 4023 }, { "epoch": 0.7769839737401043, "grad_norm": 0.7461642622947693, "learning_rate": 7.672073503528455e-05, "loss": 0.6597, "step": 4024 }, { "epoch": 0.7771770612087275, "grad_norm": 1.014835238456726, "learning_rate": 7.67064911639186e-05, "loss": 0.7002, "step": 4025 }, { "epoch": 0.7773701486773509, "grad_norm": 0.7791910767555237, "learning_rate": 7.669224425937129e-05, "loss": 0.623, "step": 4026 }, { "epoch": 0.7775632361459741, "grad_norm": 1.3077476024627686, "learning_rate": 7.667799432326073e-05, "loss": 0.617, "step": 4027 }, { "epoch": 0.7777563236145975, "grad_norm": 0.7300556898117065, "learning_rate": 7.666374135720536e-05, "loss": 0.6565, "step": 4028 }, { "epoch": 0.7779494110832207, "grad_norm": 2.345555543899536, "learning_rate": 7.664948536282392e-05, "loss": 0.6544, "step": 4029 }, { "epoch": 0.778142498551844, "grad_norm": 0.7371547818183899, "learning_rate": 7.663522634173558e-05, "loss": 0.6454, "step": 4030 }, { "epoch": 0.7783355860204673, "grad_norm": 0.8927733898162842, "learning_rate": 7.662096429555977e-05, "loss": 0.6642, "step": 4031 }, { "epoch": 0.7785286734890906, "grad_norm": 0.7035649418830872, "learning_rate": 7.660669922591628e-05, "loss": 0.6527, "step": 4032 }, { "epoch": 0.7787217609577138, "grad_norm": 0.6580469012260437, "learning_rate": 7.659243113442531e-05, "loss": 0.6012, "step": 4033 }, { "epoch": 0.7789148484263372, "grad_norm": 2.9548768997192383, "learning_rate": 7.657816002270731e-05, "loss": 0.6441, "step": 4034 }, { "epoch": 0.7791079358949604, "grad_norm": 1.4107897281646729, "learning_rate": 7.656388589238315e-05, "loss": 0.6512, "step": 4035 }, { "epoch": 0.7793010233635838, "grad_norm": 1.3769627809524536, "learning_rate": 7.6549608745074e-05, "loss": 0.6386, "step": 4036 }, { "epoch": 0.779494110832207, "grad_norm": 1.2300310134887695, "learning_rate": 7.653532858240136e-05, "loss": 0.615, "step": 4037 }, { "epoch": 0.7796871983008303, "grad_norm": 5.418915748596191, "learning_rate": 7.652104540598712e-05, "loss": 0.7088, "step": 4038 }, { "epoch": 0.7798802857694536, "grad_norm": 1.3115836381912231, "learning_rate": 7.650675921745349e-05, "loss": 0.6657, "step": 4039 }, { "epoch": 0.7800733732380768, "grad_norm": 2.905822992324829, "learning_rate": 7.649247001842301e-05, "loss": 0.5742, "step": 4040 }, { "epoch": 0.7802664607067001, "grad_norm": 0.8295304775238037, "learning_rate": 7.647817781051856e-05, "loss": 0.7131, "step": 4041 }, { "epoch": 0.7804595481753234, "grad_norm": 0.79970383644104, "learning_rate": 7.646388259536339e-05, "loss": 0.7005, "step": 4042 }, { "epoch": 0.7806526356439467, "grad_norm": 0.7948803305625916, "learning_rate": 7.644958437458107e-05, "loss": 0.6884, "step": 4043 }, { "epoch": 0.78084572311257, "grad_norm": 1.4512962102890015, "learning_rate": 7.643528314979552e-05, "loss": 0.6958, "step": 4044 }, { "epoch": 0.7810388105811933, "grad_norm": 0.7761714458465576, "learning_rate": 7.642097892263098e-05, "loss": 0.6441, "step": 4045 }, { "epoch": 0.7812318980498165, "grad_norm": 0.774498701095581, "learning_rate": 7.640667169471206e-05, "loss": 0.6315, "step": 4046 }, { "epoch": 0.7814249855184399, "grad_norm": 0.8677799701690674, "learning_rate": 7.639236146766371e-05, "loss": 0.6276, "step": 4047 }, { "epoch": 0.7816180729870631, "grad_norm": 0.7454348802566528, "learning_rate": 7.637804824311119e-05, "loss": 0.6173, "step": 4048 }, { "epoch": 0.7818111604556864, "grad_norm": 0.979385256767273, "learning_rate": 7.636373202268013e-05, "loss": 0.6811, "step": 4049 }, { "epoch": 0.7820042479243097, "grad_norm": 1.2413138151168823, "learning_rate": 7.63494128079965e-05, "loss": 0.6412, "step": 4050 }, { "epoch": 0.782197335392933, "grad_norm": 0.7751598954200745, "learning_rate": 7.633509060068656e-05, "loss": 0.5852, "step": 4051 }, { "epoch": 0.7823904228615562, "grad_norm": 1.45693039894104, "learning_rate": 7.6320765402377e-05, "loss": 0.6654, "step": 4052 }, { "epoch": 0.7825835103301796, "grad_norm": 1.7009038925170898, "learning_rate": 7.63064372146948e-05, "loss": 0.5784, "step": 4053 }, { "epoch": 0.7827765977988028, "grad_norm": 1.4041577577590942, "learning_rate": 7.629210603926722e-05, "loss": 0.6509, "step": 4054 }, { "epoch": 0.7829696852674262, "grad_norm": 0.9490203261375427, "learning_rate": 7.627777187772196e-05, "loss": 0.6505, "step": 4055 }, { "epoch": 0.7831627727360494, "grad_norm": 0.8074515461921692, "learning_rate": 7.626343473168705e-05, "loss": 0.6688, "step": 4056 }, { "epoch": 0.7833558602046727, "grad_norm": 0.9738737344741821, "learning_rate": 7.624909460279075e-05, "loss": 0.6009, "step": 4057 }, { "epoch": 0.783548947673296, "grad_norm": 0.7930698990821838, "learning_rate": 7.62347514926618e-05, "loss": 0.6391, "step": 4058 }, { "epoch": 0.7837420351419193, "grad_norm": 1.0418415069580078, "learning_rate": 7.622040540292919e-05, "loss": 0.6413, "step": 4059 }, { "epoch": 0.7839351226105425, "grad_norm": 1.1063523292541504, "learning_rate": 7.620605633522226e-05, "loss": 0.6143, "step": 4060 }, { "epoch": 0.7841282100791659, "grad_norm": 1.1253175735473633, "learning_rate": 7.619170429117076e-05, "loss": 0.6885, "step": 4061 }, { "epoch": 0.7843212975477891, "grad_norm": 1.290361762046814, "learning_rate": 7.617734927240462e-05, "loss": 0.6149, "step": 4062 }, { "epoch": 0.7845143850164125, "grad_norm": 0.7055377960205078, "learning_rate": 7.616299128055428e-05, "loss": 0.6294, "step": 4063 }, { "epoch": 0.7847074724850357, "grad_norm": 1.0956268310546875, "learning_rate": 7.614863031725044e-05, "loss": 0.7104, "step": 4064 }, { "epoch": 0.784900559953659, "grad_norm": 0.878525972366333, "learning_rate": 7.613426638412411e-05, "loss": 0.6209, "step": 4065 }, { "epoch": 0.7850936474222823, "grad_norm": 0.6189517974853516, "learning_rate": 7.611989948280669e-05, "loss": 0.6868, "step": 4066 }, { "epoch": 0.7852867348909056, "grad_norm": 1.1770038604736328, "learning_rate": 7.61055296149299e-05, "loss": 0.6638, "step": 4067 }, { "epoch": 0.7854798223595288, "grad_norm": 0.9903409481048584, "learning_rate": 7.609115678212578e-05, "loss": 0.6118, "step": 4068 }, { "epoch": 0.7856729098281522, "grad_norm": 0.9836801886558533, "learning_rate": 7.607678098602673e-05, "loss": 0.655, "step": 4069 }, { "epoch": 0.7858659972967754, "grad_norm": 0.8127025365829468, "learning_rate": 7.606240222826547e-05, "loss": 0.6843, "step": 4070 }, { "epoch": 0.7860590847653988, "grad_norm": 0.6324265599250793, "learning_rate": 7.604802051047506e-05, "loss": 0.6066, "step": 4071 }, { "epoch": 0.786252172234022, "grad_norm": 1.2149133682250977, "learning_rate": 7.60336358342889e-05, "loss": 0.6277, "step": 4072 }, { "epoch": 0.7864452597026453, "grad_norm": 2.4866621494293213, "learning_rate": 7.601924820134074e-05, "loss": 0.6225, "step": 4073 }, { "epoch": 0.7866383471712686, "grad_norm": 0.9421775937080383, "learning_rate": 7.600485761326465e-05, "loss": 0.6528, "step": 4074 }, { "epoch": 0.7868314346398919, "grad_norm": 0.6462237238883972, "learning_rate": 7.599046407169501e-05, "loss": 0.6942, "step": 4075 }, { "epoch": 0.7870245221085151, "grad_norm": 1.6425995826721191, "learning_rate": 7.597606757826658e-05, "loss": 0.6114, "step": 4076 }, { "epoch": 0.7872176095771385, "grad_norm": 0.8801589608192444, "learning_rate": 7.596166813461445e-05, "loss": 0.5963, "step": 4077 }, { "epoch": 0.7874106970457617, "grad_norm": 0.951812207698822, "learning_rate": 7.594726574237401e-05, "loss": 0.6665, "step": 4078 }, { "epoch": 0.7876037845143851, "grad_norm": 0.9250040650367737, "learning_rate": 7.593286040318101e-05, "loss": 0.6343, "step": 4079 }, { "epoch": 0.7877968719830083, "grad_norm": 1.6583236455917358, "learning_rate": 7.591845211867155e-05, "loss": 0.6182, "step": 4080 }, { "epoch": 0.7879899594516316, "grad_norm": 8.107901573181152, "learning_rate": 7.590404089048202e-05, "loss": 0.6003, "step": 4081 }, { "epoch": 0.7881830469202549, "grad_norm": 0.6343777775764465, "learning_rate": 7.58896267202492e-05, "loss": 0.6446, "step": 4082 }, { "epoch": 0.7883761343888782, "grad_norm": 0.6708535552024841, "learning_rate": 7.587520960961015e-05, "loss": 0.6877, "step": 4083 }, { "epoch": 0.7885692218575014, "grad_norm": 1.0220602750778198, "learning_rate": 7.586078956020229e-05, "loss": 0.6579, "step": 4084 }, { "epoch": 0.7887623093261248, "grad_norm": 1.827675223350525, "learning_rate": 7.58463665736634e-05, "loss": 0.6559, "step": 4085 }, { "epoch": 0.788955396794748, "grad_norm": 0.6020605564117432, "learning_rate": 7.583194065163152e-05, "loss": 0.6399, "step": 4086 }, { "epoch": 0.7891484842633713, "grad_norm": 0.6050816774368286, "learning_rate": 7.581751179574512e-05, "loss": 0.6922, "step": 4087 }, { "epoch": 0.7893415717319946, "grad_norm": 0.7618637084960938, "learning_rate": 7.580308000764292e-05, "loss": 0.5728, "step": 4088 }, { "epoch": 0.7895346592006178, "grad_norm": 0.6368786692619324, "learning_rate": 7.578864528896401e-05, "loss": 0.5927, "step": 4089 }, { "epoch": 0.7897277466692412, "grad_norm": 0.6647925972938538, "learning_rate": 7.577420764134782e-05, "loss": 0.7019, "step": 4090 }, { "epoch": 0.7899208341378644, "grad_norm": 0.712314248085022, "learning_rate": 7.575976706643411e-05, "loss": 0.652, "step": 4091 }, { "epoch": 0.7901139216064877, "grad_norm": 1.5358364582061768, "learning_rate": 7.574532356586292e-05, "loss": 0.6588, "step": 4092 }, { "epoch": 0.790307009075111, "grad_norm": 0.7705363631248474, "learning_rate": 7.573087714127472e-05, "loss": 0.7212, "step": 4093 }, { "epoch": 0.7905000965437343, "grad_norm": 0.9488884210586548, "learning_rate": 7.571642779431021e-05, "loss": 0.6456, "step": 4094 }, { "epoch": 0.7906931840123576, "grad_norm": 0.6182003617286682, "learning_rate": 7.570197552661051e-05, "loss": 0.6524, "step": 4095 }, { "epoch": 0.7908862714809809, "grad_norm": 0.745516300201416, "learning_rate": 7.5687520339817e-05, "loss": 0.7597, "step": 4096 }, { "epoch": 0.7910793589496041, "grad_norm": 0.9183629155158997, "learning_rate": 7.567306223557145e-05, "loss": 0.6118, "step": 4097 }, { "epoch": 0.7912724464182275, "grad_norm": 0.9499119520187378, "learning_rate": 7.565860121551593e-05, "loss": 0.6209, "step": 4098 }, { "epoch": 0.7914655338868507, "grad_norm": 3.546523332595825, "learning_rate": 7.564413728129283e-05, "loss": 0.6202, "step": 4099 }, { "epoch": 0.791658621355474, "grad_norm": 0.6339021921157837, "learning_rate": 7.56296704345449e-05, "loss": 0.5542, "step": 4100 }, { "epoch": 0.7918517088240973, "grad_norm": 5.145658016204834, "learning_rate": 7.561520067691519e-05, "loss": 0.6728, "step": 4101 }, { "epoch": 0.7920447962927206, "grad_norm": 0.6941596269607544, "learning_rate": 7.560072801004711e-05, "loss": 0.6076, "step": 4102 }, { "epoch": 0.7922378837613439, "grad_norm": 0.825212299823761, "learning_rate": 7.558625243558442e-05, "loss": 0.5993, "step": 4103 }, { "epoch": 0.7924309712299672, "grad_norm": 0.8518916368484497, "learning_rate": 7.557177395517112e-05, "loss": 0.6614, "step": 4104 }, { "epoch": 0.7926240586985904, "grad_norm": 0.9331488609313965, "learning_rate": 7.555729257045164e-05, "loss": 0.6136, "step": 4105 }, { "epoch": 0.7928171461672138, "grad_norm": 0.9886948466300964, "learning_rate": 7.554280828307066e-05, "loss": 0.6439, "step": 4106 }, { "epoch": 0.793010233635837, "grad_norm": 0.8995814323425293, "learning_rate": 7.55283210946733e-05, "loss": 0.6937, "step": 4107 }, { "epoch": 0.7932033211044603, "grad_norm": 1.0078251361846924, "learning_rate": 7.551383100690484e-05, "loss": 0.599, "step": 4108 }, { "epoch": 0.7933964085730836, "grad_norm": 1.2041722536087036, "learning_rate": 7.549933802141108e-05, "loss": 0.6533, "step": 4109 }, { "epoch": 0.7935894960417069, "grad_norm": 1.0306587219238281, "learning_rate": 7.548484213983799e-05, "loss": 0.6363, "step": 4110 }, { "epoch": 0.7937825835103302, "grad_norm": 0.8399631977081299, "learning_rate": 7.547034336383199e-05, "loss": 0.6591, "step": 4111 }, { "epoch": 0.7939756709789535, "grad_norm": 1.5533674955368042, "learning_rate": 7.545584169503972e-05, "loss": 0.6997, "step": 4112 }, { "epoch": 0.7941687584475767, "grad_norm": 0.9032691717147827, "learning_rate": 7.544133713510824e-05, "loss": 0.6329, "step": 4113 }, { "epoch": 0.7943618459162001, "grad_norm": 1.0118567943572998, "learning_rate": 7.542682968568488e-05, "loss": 0.5965, "step": 4114 }, { "epoch": 0.7945549333848233, "grad_norm": 0.7468696236610413, "learning_rate": 7.541231934841733e-05, "loss": 0.6671, "step": 4115 }, { "epoch": 0.7947480208534466, "grad_norm": 0.5778937935829163, "learning_rate": 7.53978061249536e-05, "loss": 0.6173, "step": 4116 }, { "epoch": 0.7949411083220699, "grad_norm": 2.1449198722839355, "learning_rate": 7.5383290016942e-05, "loss": 0.7688, "step": 4117 }, { "epoch": 0.7951341957906932, "grad_norm": 0.5502179265022278, "learning_rate": 7.536877102603122e-05, "loss": 0.6747, "step": 4118 }, { "epoch": 0.7953272832593165, "grad_norm": 0.7823625206947327, "learning_rate": 7.535424915387025e-05, "loss": 0.5553, "step": 4119 }, { "epoch": 0.7955203707279398, "grad_norm": 1.6360957622528076, "learning_rate": 7.533972440210839e-05, "loss": 0.6297, "step": 4120 }, { "epoch": 0.795713458196563, "grad_norm": 0.69476318359375, "learning_rate": 7.532519677239531e-05, "loss": 0.6073, "step": 4121 }, { "epoch": 0.7959065456651864, "grad_norm": 0.9801367521286011, "learning_rate": 7.531066626638094e-05, "loss": 0.6818, "step": 4122 }, { "epoch": 0.7960996331338096, "grad_norm": 1.0683016777038574, "learning_rate": 7.529613288571562e-05, "loss": 0.6244, "step": 4123 }, { "epoch": 0.796292720602433, "grad_norm": 0.8641615509986877, "learning_rate": 7.528159663204994e-05, "loss": 0.6605, "step": 4124 }, { "epoch": 0.7964858080710562, "grad_norm": 0.7615433931350708, "learning_rate": 7.526705750703487e-05, "loss": 0.6435, "step": 4125 }, { "epoch": 0.7966788955396795, "grad_norm": 0.6945052146911621, "learning_rate": 7.525251551232169e-05, "loss": 0.6547, "step": 4126 }, { "epoch": 0.7968719830083028, "grad_norm": 0.7268550992012024, "learning_rate": 7.5237970649562e-05, "loss": 0.5849, "step": 4127 }, { "epoch": 0.7970650704769261, "grad_norm": 0.8030416965484619, "learning_rate": 7.522342292040769e-05, "loss": 0.616, "step": 4128 }, { "epoch": 0.7972581579455493, "grad_norm": 0.7732712030410767, "learning_rate": 7.520887232651108e-05, "loss": 0.6318, "step": 4129 }, { "epoch": 0.7974512454141727, "grad_norm": 0.6485053896903992, "learning_rate": 7.519431886952471e-05, "loss": 0.6588, "step": 4130 }, { "epoch": 0.7976443328827959, "grad_norm": 1.1477679014205933, "learning_rate": 7.517976255110148e-05, "loss": 0.6964, "step": 4131 }, { "epoch": 0.7978374203514192, "grad_norm": 3.794727325439453, "learning_rate": 7.516520337289463e-05, "loss": 0.692, "step": 4132 }, { "epoch": 0.7980305078200425, "grad_norm": 0.9906985759735107, "learning_rate": 7.515064133655771e-05, "loss": 0.6655, "step": 4133 }, { "epoch": 0.7982235952886657, "grad_norm": 1.1028532981872559, "learning_rate": 7.513607644374462e-05, "loss": 0.7072, "step": 4134 }, { "epoch": 0.798416682757289, "grad_norm": 0.6433757543563843, "learning_rate": 7.512150869610952e-05, "loss": 0.6779, "step": 4135 }, { "epoch": 0.7986097702259123, "grad_norm": 0.7148146629333496, "learning_rate": 7.510693809530698e-05, "loss": 0.6359, "step": 4136 }, { "epoch": 0.7988028576945356, "grad_norm": 0.8302411437034607, "learning_rate": 7.509236464299185e-05, "loss": 0.6764, "step": 4137 }, { "epoch": 0.7989959451631589, "grad_norm": 1.1344341039657593, "learning_rate": 7.507778834081926e-05, "loss": 0.7106, "step": 4138 }, { "epoch": 0.7991890326317822, "grad_norm": 0.9156889915466309, "learning_rate": 7.506320919044475e-05, "loss": 0.6553, "step": 4139 }, { "epoch": 0.7993821201004054, "grad_norm": 1.5060389041900635, "learning_rate": 7.504862719352413e-05, "loss": 0.7003, "step": 4140 }, { "epoch": 0.7995752075690288, "grad_norm": 0.7137902975082397, "learning_rate": 7.503404235171355e-05, "loss": 0.725, "step": 4141 }, { "epoch": 0.799768295037652, "grad_norm": 0.6819602251052856, "learning_rate": 7.501945466666946e-05, "loss": 0.6405, "step": 4142 }, { "epoch": 0.7999613825062754, "grad_norm": 0.8198065757751465, "learning_rate": 7.500486414004868e-05, "loss": 0.6771, "step": 4143 }, { "epoch": 0.8001544699748986, "grad_norm": 1.0442676544189453, "learning_rate": 7.499027077350831e-05, "loss": 0.6203, "step": 4144 }, { "epoch": 0.8003475574435219, "grad_norm": 0.7764274477958679, "learning_rate": 7.49756745687058e-05, "loss": 0.6713, "step": 4145 }, { "epoch": 0.8005406449121452, "grad_norm": 0.9224888682365417, "learning_rate": 7.496107552729887e-05, "loss": 0.6135, "step": 4146 }, { "epoch": 0.8007337323807685, "grad_norm": 1.118625283241272, "learning_rate": 7.494647365094565e-05, "loss": 0.6735, "step": 4147 }, { "epoch": 0.8009268198493917, "grad_norm": 0.8678269386291504, "learning_rate": 7.493186894130452e-05, "loss": 0.6592, "step": 4148 }, { "epoch": 0.8011199073180151, "grad_norm": 1.0021724700927734, "learning_rate": 7.491726140003421e-05, "loss": 0.6874, "step": 4149 }, { "epoch": 0.8013129947866383, "grad_norm": 1.380368947982788, "learning_rate": 7.490265102879376e-05, "loss": 0.6422, "step": 4150 }, { "epoch": 0.8015060822552617, "grad_norm": 1.0444480180740356, "learning_rate": 7.488803782924255e-05, "loss": 0.69, "step": 4151 }, { "epoch": 0.8016991697238849, "grad_norm": 1.2099263668060303, "learning_rate": 7.487342180304025e-05, "loss": 0.6307, "step": 4152 }, { "epoch": 0.8018922571925082, "grad_norm": 1.1048146486282349, "learning_rate": 7.485880295184689e-05, "loss": 0.6459, "step": 4153 }, { "epoch": 0.8020853446611315, "grad_norm": 1.278106927871704, "learning_rate": 7.48441812773228e-05, "loss": 0.7072, "step": 4154 }, { "epoch": 0.8022784321297548, "grad_norm": 1.050815224647522, "learning_rate": 7.482955678112862e-05, "loss": 0.6797, "step": 4155 }, { "epoch": 0.802471519598378, "grad_norm": 2.103304624557495, "learning_rate": 7.481492946492535e-05, "loss": 0.6081, "step": 4156 }, { "epoch": 0.8026646070670014, "grad_norm": 2.277024745941162, "learning_rate": 7.480029933037422e-05, "loss": 0.7046, "step": 4157 }, { "epoch": 0.8028576945356246, "grad_norm": 0.9403090476989746, "learning_rate": 7.478566637913693e-05, "loss": 0.6149, "step": 4158 }, { "epoch": 0.803050782004248, "grad_norm": 0.7505311965942383, "learning_rate": 7.477103061287535e-05, "loss": 0.6055, "step": 4159 }, { "epoch": 0.8032438694728712, "grad_norm": 1.8041011095046997, "learning_rate": 7.475639203325174e-05, "loss": 0.686, "step": 4160 }, { "epoch": 0.8034369569414945, "grad_norm": 1.4387061595916748, "learning_rate": 7.47417506419287e-05, "loss": 0.6381, "step": 4161 }, { "epoch": 0.8036300444101178, "grad_norm": 1.2809780836105347, "learning_rate": 7.47271064405691e-05, "loss": 0.6298, "step": 4162 }, { "epoch": 0.8038231318787411, "grad_norm": 1.8354158401489258, "learning_rate": 7.471245943083615e-05, "loss": 0.7097, "step": 4163 }, { "epoch": 0.8040162193473643, "grad_norm": 1.7634403705596924, "learning_rate": 7.469780961439338e-05, "loss": 0.6065, "step": 4164 }, { "epoch": 0.8042093068159877, "grad_norm": 1.0212461948394775, "learning_rate": 7.468315699290464e-05, "loss": 0.6983, "step": 4165 }, { "epoch": 0.8044023942846109, "grad_norm": 2.197979688644409, "learning_rate": 7.466850156803413e-05, "loss": 0.6628, "step": 4166 }, { "epoch": 0.8045954817532343, "grad_norm": 0.9406061768531799, "learning_rate": 7.465384334144628e-05, "loss": 0.598, "step": 4167 }, { "epoch": 0.8047885692218575, "grad_norm": 1.3129740953445435, "learning_rate": 7.463918231480593e-05, "loss": 0.6402, "step": 4168 }, { "epoch": 0.8049816566904808, "grad_norm": 1.2848539352416992, "learning_rate": 7.462451848977818e-05, "loss": 0.5783, "step": 4169 }, { "epoch": 0.8051747441591041, "grad_norm": 1.033502459526062, "learning_rate": 7.460985186802849e-05, "loss": 0.643, "step": 4170 }, { "epoch": 0.8053678316277274, "grad_norm": 1.6222014427185059, "learning_rate": 7.459518245122259e-05, "loss": 0.6828, "step": 4171 }, { "epoch": 0.8055609190963506, "grad_norm": 1.6600013971328735, "learning_rate": 7.458051024102658e-05, "loss": 0.6111, "step": 4172 }, { "epoch": 0.805754006564974, "grad_norm": 1.5799659490585327, "learning_rate": 7.456583523910684e-05, "loss": 0.6791, "step": 4173 }, { "epoch": 0.8059470940335972, "grad_norm": 1.716021180152893, "learning_rate": 7.455115744713008e-05, "loss": 0.5786, "step": 4174 }, { "epoch": 0.8061401815022206, "grad_norm": 1.8844385147094727, "learning_rate": 7.453647686676331e-05, "loss": 0.5939, "step": 4175 }, { "epoch": 0.8063332689708438, "grad_norm": 1.2665785551071167, "learning_rate": 7.452179349967393e-05, "loss": 0.6216, "step": 4176 }, { "epoch": 0.8065263564394671, "grad_norm": 1.6955323219299316, "learning_rate": 7.450710734752953e-05, "loss": 0.6195, "step": 4177 }, { "epoch": 0.8067194439080904, "grad_norm": 2.5829076766967773, "learning_rate": 7.449241841199811e-05, "loss": 0.6146, "step": 4178 }, { "epoch": 0.8069125313767137, "grad_norm": 1.3006893396377563, "learning_rate": 7.447772669474797e-05, "loss": 0.6363, "step": 4179 }, { "epoch": 0.8071056188453369, "grad_norm": 1.5276693105697632, "learning_rate": 7.446303219744772e-05, "loss": 0.6931, "step": 4180 }, { "epoch": 0.8072987063139603, "grad_norm": 1.1677582263946533, "learning_rate": 7.444833492176625e-05, "loss": 0.6244, "step": 4181 }, { "epoch": 0.8074917937825835, "grad_norm": 1.752074956893921, "learning_rate": 7.443363486937285e-05, "loss": 0.6781, "step": 4182 }, { "epoch": 0.8076848812512067, "grad_norm": 1.1263415813446045, "learning_rate": 7.441893204193704e-05, "loss": 0.6893, "step": 4183 }, { "epoch": 0.8078779687198301, "grad_norm": 3.4943196773529053, "learning_rate": 7.44042264411287e-05, "loss": 0.6636, "step": 4184 }, { "epoch": 0.8080710561884533, "grad_norm": 3.670501708984375, "learning_rate": 7.438951806861799e-05, "loss": 0.6496, "step": 4185 }, { "epoch": 0.8082641436570767, "grad_norm": 1.3703073263168335, "learning_rate": 7.437480692607544e-05, "loss": 0.5925, "step": 4186 }, { "epoch": 0.8084572311256999, "grad_norm": 0.990421712398529, "learning_rate": 7.436009301517186e-05, "loss": 0.6852, "step": 4187 }, { "epoch": 0.8086503185943232, "grad_norm": 0.9924529790878296, "learning_rate": 7.434537633757835e-05, "loss": 0.6527, "step": 4188 }, { "epoch": 0.8088434060629465, "grad_norm": 1.2486414909362793, "learning_rate": 7.433065689496638e-05, "loss": 0.616, "step": 4189 }, { "epoch": 0.8090364935315698, "grad_norm": 1.0286192893981934, "learning_rate": 7.43159346890077e-05, "loss": 0.6339, "step": 4190 }, { "epoch": 0.809229581000193, "grad_norm": 0.9854729175567627, "learning_rate": 7.430120972137437e-05, "loss": 0.6398, "step": 4191 }, { "epoch": 0.8094226684688164, "grad_norm": 1.6350826025009155, "learning_rate": 7.428648199373879e-05, "loss": 0.628, "step": 4192 }, { "epoch": 0.8096157559374396, "grad_norm": 1.4840149879455566, "learning_rate": 7.427175150777366e-05, "loss": 0.6319, "step": 4193 }, { "epoch": 0.809808843406063, "grad_norm": 1.6076087951660156, "learning_rate": 7.425701826515195e-05, "loss": 0.6566, "step": 4194 }, { "epoch": 0.8100019308746862, "grad_norm": 1.193252682685852, "learning_rate": 7.424228226754702e-05, "loss": 0.6146, "step": 4195 }, { "epoch": 0.8101950183433095, "grad_norm": 0.9514247179031372, "learning_rate": 7.422754351663252e-05, "loss": 0.6266, "step": 4196 }, { "epoch": 0.8103881058119328, "grad_norm": 0.8700075149536133, "learning_rate": 7.421280201408236e-05, "loss": 0.6808, "step": 4197 }, { "epoch": 0.8105811932805561, "grad_norm": 1.0817556381225586, "learning_rate": 7.41980577615708e-05, "loss": 0.6103, "step": 4198 }, { "epoch": 0.8107742807491793, "grad_norm": 1.13398277759552, "learning_rate": 7.418331076077247e-05, "loss": 0.6104, "step": 4199 }, { "epoch": 0.8109673682178027, "grad_norm": 1.8760061264038086, "learning_rate": 7.416856101336219e-05, "loss": 0.5675, "step": 4200 }, { "epoch": 0.8111604556864259, "grad_norm": 1.3094433546066284, "learning_rate": 7.41538085210152e-05, "loss": 0.5737, "step": 4201 }, { "epoch": 0.8113535431550493, "grad_norm": 1.454486608505249, "learning_rate": 7.4139053285407e-05, "loss": 0.6549, "step": 4202 }, { "epoch": 0.8115466306236725, "grad_norm": 0.9407318234443665, "learning_rate": 7.412429530821339e-05, "loss": 0.6175, "step": 4203 }, { "epoch": 0.8117397180922958, "grad_norm": 1.1514557600021362, "learning_rate": 7.410953459111053e-05, "loss": 0.6657, "step": 4204 }, { "epoch": 0.8119328055609191, "grad_norm": 1.6922804117202759, "learning_rate": 7.409477113577485e-05, "loss": 0.6873, "step": 4205 }, { "epoch": 0.8121258930295424, "grad_norm": 8.476824760437012, "learning_rate": 7.408000494388312e-05, "loss": 0.709, "step": 4206 }, { "epoch": 0.8123189804981656, "grad_norm": 5.170529842376709, "learning_rate": 7.406523601711238e-05, "loss": 0.6438, "step": 4207 }, { "epoch": 0.812512067966789, "grad_norm": 1.1017802953720093, "learning_rate": 7.405046435714003e-05, "loss": 0.6769, "step": 4208 }, { "epoch": 0.8127051554354122, "grad_norm": 1.053098201751709, "learning_rate": 7.403568996564373e-05, "loss": 0.6325, "step": 4209 }, { "epoch": 0.8128982429040356, "grad_norm": 1.6024072170257568, "learning_rate": 7.402091284430152e-05, "loss": 0.6591, "step": 4210 }, { "epoch": 0.8130913303726588, "grad_norm": 0.9252497553825378, "learning_rate": 7.400613299479165e-05, "loss": 0.6344, "step": 4211 }, { "epoch": 0.8132844178412821, "grad_norm": 0.9503124952316284, "learning_rate": 7.39913504187928e-05, "loss": 0.6383, "step": 4212 }, { "epoch": 0.8134775053099054, "grad_norm": 7.787924289703369, "learning_rate": 7.397656511798386e-05, "loss": 0.6184, "step": 4213 }, { "epoch": 0.8136705927785287, "grad_norm": 1.7468377351760864, "learning_rate": 7.396177709404404e-05, "loss": 0.6391, "step": 4214 }, { "epoch": 0.8138636802471519, "grad_norm": 1.1006176471710205, "learning_rate": 7.394698634865294e-05, "loss": 0.6077, "step": 4215 }, { "epoch": 0.8140567677157753, "grad_norm": 0.894985556602478, "learning_rate": 7.393219288349039e-05, "loss": 0.6606, "step": 4216 }, { "epoch": 0.8142498551843985, "grad_norm": 0.7449870705604553, "learning_rate": 7.391739670023653e-05, "loss": 0.6308, "step": 4217 }, { "epoch": 0.8144429426530219, "grad_norm": 1.129438877105713, "learning_rate": 7.390259780057187e-05, "loss": 0.6139, "step": 4218 }, { "epoch": 0.8146360301216451, "grad_norm": 1.0022660493850708, "learning_rate": 7.388779618617719e-05, "loss": 0.6795, "step": 4219 }, { "epoch": 0.8148291175902684, "grad_norm": 0.8059614896774292, "learning_rate": 7.387299185873353e-05, "loss": 0.6948, "step": 4220 }, { "epoch": 0.8150222050588917, "grad_norm": 0.9791074991226196, "learning_rate": 7.385818481992234e-05, "loss": 0.5979, "step": 4221 }, { "epoch": 0.815215292527515, "grad_norm": 1.0105295181274414, "learning_rate": 7.384337507142531e-05, "loss": 0.5902, "step": 4222 }, { "epoch": 0.8154083799961382, "grad_norm": 1.7626299858093262, "learning_rate": 7.382856261492443e-05, "loss": 0.6734, "step": 4223 }, { "epoch": 0.8156014674647616, "grad_norm": 0.7694852948188782, "learning_rate": 7.381374745210205e-05, "loss": 0.6653, "step": 4224 }, { "epoch": 0.8157945549333848, "grad_norm": 1.2583411931991577, "learning_rate": 7.379892958464079e-05, "loss": 0.5845, "step": 4225 }, { "epoch": 0.8159876424020082, "grad_norm": 1.6832184791564941, "learning_rate": 7.378410901422356e-05, "loss": 0.6008, "step": 4226 }, { "epoch": 0.8161807298706314, "grad_norm": 1.0307952165603638, "learning_rate": 7.376928574253364e-05, "loss": 0.6288, "step": 4227 }, { "epoch": 0.8163738173392547, "grad_norm": 0.8632040023803711, "learning_rate": 7.375445977125456e-05, "loss": 0.6058, "step": 4228 }, { "epoch": 0.816566904807878, "grad_norm": 0.7486860156059265, "learning_rate": 7.373963110207016e-05, "loss": 0.6484, "step": 4229 }, { "epoch": 0.8167599922765012, "grad_norm": 1.0442121028900146, "learning_rate": 7.372479973666464e-05, "loss": 0.6411, "step": 4230 }, { "epoch": 0.8169530797451245, "grad_norm": 1.3822983503341675, "learning_rate": 7.370996567672242e-05, "loss": 0.6876, "step": 4231 }, { "epoch": 0.8171461672137478, "grad_norm": 1.1401559114456177, "learning_rate": 7.369512892392829e-05, "loss": 0.6266, "step": 4232 }, { "epoch": 0.8173392546823711, "grad_norm": 1.1163121461868286, "learning_rate": 7.368028947996737e-05, "loss": 0.6161, "step": 4233 }, { "epoch": 0.8175323421509944, "grad_norm": 0.9016106128692627, "learning_rate": 7.3665447346525e-05, "loss": 0.6323, "step": 4234 }, { "epoch": 0.8177254296196177, "grad_norm": 2.459825277328491, "learning_rate": 7.365060252528687e-05, "loss": 0.6266, "step": 4235 }, { "epoch": 0.8179185170882409, "grad_norm": 1.1177300214767456, "learning_rate": 7.3635755017939e-05, "loss": 0.6475, "step": 4236 }, { "epoch": 0.8181116045568643, "grad_norm": 1.1542332172393799, "learning_rate": 7.362090482616768e-05, "loss": 0.5996, "step": 4237 }, { "epoch": 0.8183046920254875, "grad_norm": 1.2895915508270264, "learning_rate": 7.36060519516595e-05, "loss": 0.6018, "step": 4238 }, { "epoch": 0.8184977794941108, "grad_norm": 0.9478473663330078, "learning_rate": 7.35911963961014e-05, "loss": 0.6651, "step": 4239 }, { "epoch": 0.8186908669627341, "grad_norm": 1.4732770919799805, "learning_rate": 7.357633816118059e-05, "loss": 0.5924, "step": 4240 }, { "epoch": 0.8188839544313574, "grad_norm": 0.9231606721878052, "learning_rate": 7.356147724858456e-05, "loss": 0.6141, "step": 4241 }, { "epoch": 0.8190770418999807, "grad_norm": 0.84211266040802, "learning_rate": 7.354661366000117e-05, "loss": 0.6764, "step": 4242 }, { "epoch": 0.819270129368604, "grad_norm": 1.1454524993896484, "learning_rate": 7.353174739711853e-05, "loss": 0.5871, "step": 4243 }, { "epoch": 0.8194632168372272, "grad_norm": 1.1682313680648804, "learning_rate": 7.351687846162508e-05, "loss": 0.6635, "step": 4244 }, { "epoch": 0.8196563043058506, "grad_norm": 1.0214905738830566, "learning_rate": 7.350200685520953e-05, "loss": 0.6288, "step": 4245 }, { "epoch": 0.8198493917744738, "grad_norm": 1.0768494606018066, "learning_rate": 7.348713257956093e-05, "loss": 0.5599, "step": 4246 }, { "epoch": 0.8200424792430971, "grad_norm": 0.6803908348083496, "learning_rate": 7.347225563636866e-05, "loss": 0.665, "step": 4247 }, { "epoch": 0.8202355667117204, "grad_norm": 0.6548538208007812, "learning_rate": 7.34573760273223e-05, "loss": 0.6589, "step": 4248 }, { "epoch": 0.8204286541803437, "grad_norm": 0.7705560922622681, "learning_rate": 7.344249375411183e-05, "loss": 0.635, "step": 4249 }, { "epoch": 0.820621741648967, "grad_norm": 0.6696896553039551, "learning_rate": 7.34276088184275e-05, "loss": 0.7024, "step": 4250 }, { "epoch": 0.8208148291175903, "grad_norm": 1.5779842138290405, "learning_rate": 7.341272122195987e-05, "loss": 0.6422, "step": 4251 }, { "epoch": 0.8210079165862135, "grad_norm": 0.8403317928314209, "learning_rate": 7.339783096639978e-05, "loss": 0.6563, "step": 4252 }, { "epoch": 0.8212010040548369, "grad_norm": 0.8491297364234924, "learning_rate": 7.338293805343839e-05, "loss": 0.7073, "step": 4253 }, { "epoch": 0.8213940915234601, "grad_norm": 0.8769031763076782, "learning_rate": 7.336804248476715e-05, "loss": 0.6298, "step": 4254 }, { "epoch": 0.8215871789920834, "grad_norm": 0.8021599650382996, "learning_rate": 7.335314426207782e-05, "loss": 0.6603, "step": 4255 }, { "epoch": 0.8217802664607067, "grad_norm": 0.8144459128379822, "learning_rate": 7.333824338706249e-05, "loss": 0.6495, "step": 4256 }, { "epoch": 0.82197335392933, "grad_norm": 0.6990933418273926, "learning_rate": 7.332333986141348e-05, "loss": 0.6403, "step": 4257 }, { "epoch": 0.8221664413979533, "grad_norm": 0.8438738584518433, "learning_rate": 7.330843368682348e-05, "loss": 0.6664, "step": 4258 }, { "epoch": 0.8223595288665766, "grad_norm": 0.6642364263534546, "learning_rate": 7.329352486498545e-05, "loss": 0.582, "step": 4259 }, { "epoch": 0.8225526163351998, "grad_norm": 1.1588314771652222, "learning_rate": 7.327861339759266e-05, "loss": 0.6437, "step": 4260 }, { "epoch": 0.8227457038038232, "grad_norm": 0.7102349996566772, "learning_rate": 7.326369928633865e-05, "loss": 0.6056, "step": 4261 }, { "epoch": 0.8229387912724464, "grad_norm": 1.6718538999557495, "learning_rate": 7.324878253291732e-05, "loss": 0.6178, "step": 4262 }, { "epoch": 0.8231318787410697, "grad_norm": 1.567360281944275, "learning_rate": 7.32338631390228e-05, "loss": 0.6607, "step": 4263 }, { "epoch": 0.823324966209693, "grad_norm": 1.1622729301452637, "learning_rate": 7.321894110634958e-05, "loss": 0.6865, "step": 4264 }, { "epoch": 0.8235180536783163, "grad_norm": 0.7485345602035522, "learning_rate": 7.320401643659243e-05, "loss": 0.628, "step": 4265 }, { "epoch": 0.8237111411469396, "grad_norm": 1.1327860355377197, "learning_rate": 7.318908913144637e-05, "loss": 0.6615, "step": 4266 }, { "epoch": 0.8239042286155629, "grad_norm": 1.2229646444320679, "learning_rate": 7.317415919260684e-05, "loss": 0.6664, "step": 4267 }, { "epoch": 0.8240973160841861, "grad_norm": 0.8891427516937256, "learning_rate": 7.315922662176944e-05, "loss": 0.6543, "step": 4268 }, { "epoch": 0.8242904035528095, "grad_norm": 0.8832783102989197, "learning_rate": 7.314429142063016e-05, "loss": 0.646, "step": 4269 }, { "epoch": 0.8244834910214327, "grad_norm": 0.7915849089622498, "learning_rate": 7.312935359088525e-05, "loss": 0.6366, "step": 4270 }, { "epoch": 0.824676578490056, "grad_norm": 0.8790382146835327, "learning_rate": 7.311441313423126e-05, "loss": 0.6418, "step": 4271 }, { "epoch": 0.8248696659586793, "grad_norm": 9.469175338745117, "learning_rate": 7.309947005236507e-05, "loss": 0.6694, "step": 4272 }, { "epoch": 0.8250627534273026, "grad_norm": 1.7259438037872314, "learning_rate": 7.308452434698384e-05, "loss": 0.6737, "step": 4273 }, { "epoch": 0.8252558408959259, "grad_norm": 1.1561840772628784, "learning_rate": 7.306957601978499e-05, "loss": 0.6577, "step": 4274 }, { "epoch": 0.8254489283645492, "grad_norm": 3.3004043102264404, "learning_rate": 7.30546250724663e-05, "loss": 0.7374, "step": 4275 }, { "epoch": 0.8256420158331724, "grad_norm": 0.8604854941368103, "learning_rate": 7.303967150672581e-05, "loss": 0.6634, "step": 4276 }, { "epoch": 0.8258351033017957, "grad_norm": 1.0053684711456299, "learning_rate": 7.302471532426186e-05, "loss": 0.651, "step": 4277 }, { "epoch": 0.826028190770419, "grad_norm": 0.7200989723205566, "learning_rate": 7.300975652677312e-05, "loss": 0.6242, "step": 4278 }, { "epoch": 0.8262212782390422, "grad_norm": 0.7454149723052979, "learning_rate": 7.29947951159585e-05, "loss": 0.6061, "step": 4279 }, { "epoch": 0.8264143657076656, "grad_norm": 5.3130388259887695, "learning_rate": 7.297983109351724e-05, "loss": 0.6027, "step": 4280 }, { "epoch": 0.8266074531762888, "grad_norm": 1.22083580493927, "learning_rate": 7.296486446114889e-05, "loss": 0.6403, "step": 4281 }, { "epoch": 0.8268005406449122, "grad_norm": 0.9923729300498962, "learning_rate": 7.294989522055328e-05, "loss": 0.6255, "step": 4282 }, { "epoch": 0.8269936281135354, "grad_norm": 0.7602053880691528, "learning_rate": 7.293492337343054e-05, "loss": 0.6391, "step": 4283 }, { "epoch": 0.8271867155821587, "grad_norm": 0.8653231859207153, "learning_rate": 7.291994892148106e-05, "loss": 0.6327, "step": 4284 }, { "epoch": 0.827379803050782, "grad_norm": 0.5426446199417114, "learning_rate": 7.290497186640561e-05, "loss": 0.5928, "step": 4285 }, { "epoch": 0.8275728905194053, "grad_norm": 0.9652832746505737, "learning_rate": 7.288999220990515e-05, "loss": 0.6536, "step": 4286 }, { "epoch": 0.8277659779880285, "grad_norm": 1.0913723707199097, "learning_rate": 7.287500995368104e-05, "loss": 0.6007, "step": 4287 }, { "epoch": 0.8279590654566519, "grad_norm": 1.8978625535964966, "learning_rate": 7.286002509943484e-05, "loss": 0.5995, "step": 4288 }, { "epoch": 0.8281521529252751, "grad_norm": 1.0506837368011475, "learning_rate": 7.284503764886849e-05, "loss": 0.6419, "step": 4289 }, { "epoch": 0.8283452403938985, "grad_norm": 0.7171085476875305, "learning_rate": 7.283004760368416e-05, "loss": 0.6432, "step": 4290 }, { "epoch": 0.8285383278625217, "grad_norm": 1.1503950357437134, "learning_rate": 7.281505496558434e-05, "loss": 0.6386, "step": 4291 }, { "epoch": 0.828731415331145, "grad_norm": 1.8098890781402588, "learning_rate": 7.28000597362718e-05, "loss": 0.6357, "step": 4292 }, { "epoch": 0.8289245027997683, "grad_norm": 1.2374626398086548, "learning_rate": 7.27850619174497e-05, "loss": 0.6298, "step": 4293 }, { "epoch": 0.8291175902683916, "grad_norm": 1.1359944343566895, "learning_rate": 7.27700615108213e-05, "loss": 0.651, "step": 4294 }, { "epoch": 0.8293106777370148, "grad_norm": 0.8425984382629395, "learning_rate": 7.275505851809032e-05, "loss": 0.6338, "step": 4295 }, { "epoch": 0.8295037652056382, "grad_norm": 1.179958701133728, "learning_rate": 7.274005294096074e-05, "loss": 0.7188, "step": 4296 }, { "epoch": 0.8296968526742614, "grad_norm": 0.8776713609695435, "learning_rate": 7.272504478113679e-05, "loss": 0.6145, "step": 4297 }, { "epoch": 0.8298899401428848, "grad_norm": 0.9107654094696045, "learning_rate": 7.2710034040323e-05, "loss": 0.6424, "step": 4298 }, { "epoch": 0.830083027611508, "grad_norm": 1.241227626800537, "learning_rate": 7.269502072022425e-05, "loss": 0.6847, "step": 4299 }, { "epoch": 0.8302761150801313, "grad_norm": 0.6416803598403931, "learning_rate": 7.268000482254564e-05, "loss": 0.6444, "step": 4300 }, { "epoch": 0.8304692025487546, "grad_norm": 2.3182389736175537, "learning_rate": 7.26649863489926e-05, "loss": 0.6647, "step": 4301 }, { "epoch": 0.8306622900173779, "grad_norm": 0.9786497354507446, "learning_rate": 7.264996530127087e-05, "loss": 0.6535, "step": 4302 }, { "epoch": 0.8308553774860011, "grad_norm": 2.8997015953063965, "learning_rate": 7.263494168108646e-05, "loss": 0.6045, "step": 4303 }, { "epoch": 0.8310484649546245, "grad_norm": 1.5264067649841309, "learning_rate": 7.261991549014562e-05, "loss": 0.5717, "step": 4304 }, { "epoch": 0.8312415524232477, "grad_norm": 0.8377323746681213, "learning_rate": 7.260488673015501e-05, "loss": 0.7192, "step": 4305 }, { "epoch": 0.831434639891871, "grad_norm": 1.1852456331253052, "learning_rate": 7.25898554028215e-05, "loss": 0.6407, "step": 4306 }, { "epoch": 0.8316277273604943, "grad_norm": 1.2549487352371216, "learning_rate": 7.257482150985225e-05, "loss": 0.6796, "step": 4307 }, { "epoch": 0.8318208148291176, "grad_norm": 1.6136548519134521, "learning_rate": 7.255978505295475e-05, "loss": 0.6982, "step": 4308 }, { "epoch": 0.8320139022977409, "grad_norm": 0.9044841527938843, "learning_rate": 7.254474603383673e-05, "loss": 0.6633, "step": 4309 }, { "epoch": 0.8322069897663642, "grad_norm": 0.7963463664054871, "learning_rate": 7.252970445420628e-05, "loss": 0.6276, "step": 4310 }, { "epoch": 0.8324000772349874, "grad_norm": 0.8213988542556763, "learning_rate": 7.251466031577172e-05, "loss": 0.6498, "step": 4311 }, { "epoch": 0.8325931647036108, "grad_norm": 1.2235333919525146, "learning_rate": 7.249961362024168e-05, "loss": 0.6762, "step": 4312 }, { "epoch": 0.832786252172234, "grad_norm": 0.7057346105575562, "learning_rate": 7.24845643693251e-05, "loss": 0.6565, "step": 4313 }, { "epoch": 0.8329793396408574, "grad_norm": 0.8755031228065491, "learning_rate": 7.24695125647312e-05, "loss": 0.7325, "step": 4314 }, { "epoch": 0.8331724271094806, "grad_norm": 0.735218346118927, "learning_rate": 7.245445820816945e-05, "loss": 0.6589, "step": 4315 }, { "epoch": 0.8333655145781039, "grad_norm": 1.3290690183639526, "learning_rate": 7.243940130134967e-05, "loss": 0.6734, "step": 4316 }, { "epoch": 0.8335586020467272, "grad_norm": 1.5112303495407104, "learning_rate": 7.242434184598192e-05, "loss": 0.6579, "step": 4317 }, { "epoch": 0.8337516895153505, "grad_norm": 1.0110522508621216, "learning_rate": 7.240927984377663e-05, "loss": 0.6117, "step": 4318 }, { "epoch": 0.8339447769839737, "grad_norm": 1.092448115348816, "learning_rate": 7.23942152964444e-05, "loss": 0.5939, "step": 4319 }, { "epoch": 0.8341378644525971, "grad_norm": 0.6901905536651611, "learning_rate": 7.237914820569619e-05, "loss": 0.6371, "step": 4320 }, { "epoch": 0.8343309519212203, "grad_norm": 1.9293980598449707, "learning_rate": 7.236407857324328e-05, "loss": 0.6158, "step": 4321 }, { "epoch": 0.8345240393898437, "grad_norm": 1.5615811347961426, "learning_rate": 7.234900640079716e-05, "loss": 0.6595, "step": 4322 }, { "epoch": 0.8347171268584669, "grad_norm": 0.9930275678634644, "learning_rate": 7.233393169006964e-05, "loss": 0.6054, "step": 4323 }, { "epoch": 0.8349102143270901, "grad_norm": 0.914936900138855, "learning_rate": 7.231885444277288e-05, "loss": 0.6493, "step": 4324 }, { "epoch": 0.8351033017957135, "grad_norm": 1.1111211776733398, "learning_rate": 7.230377466061922e-05, "loss": 0.6428, "step": 4325 }, { "epoch": 0.8352963892643367, "grad_norm": 0.6160704493522644, "learning_rate": 7.228869234532137e-05, "loss": 0.6738, "step": 4326 }, { "epoch": 0.83548947673296, "grad_norm": 0.9360513687133789, "learning_rate": 7.227360749859226e-05, "loss": 0.5816, "step": 4327 }, { "epoch": 0.8356825642015833, "grad_norm": 1.552639126777649, "learning_rate": 7.225852012214522e-05, "loss": 0.6346, "step": 4328 }, { "epoch": 0.8358756516702066, "grad_norm": 0.7459601759910583, "learning_rate": 7.224343021769372e-05, "loss": 0.7006, "step": 4329 }, { "epoch": 0.8360687391388298, "grad_norm": 0.797701895236969, "learning_rate": 7.222833778695164e-05, "loss": 0.6318, "step": 4330 }, { "epoch": 0.8362618266074532, "grad_norm": 1.286232590675354, "learning_rate": 7.221324283163306e-05, "loss": 0.6873, "step": 4331 }, { "epoch": 0.8364549140760764, "grad_norm": 0.8450058102607727, "learning_rate": 7.219814535345243e-05, "loss": 0.6342, "step": 4332 }, { "epoch": 0.8366480015446998, "grad_norm": 1.058556079864502, "learning_rate": 7.21830453541244e-05, "loss": 0.6896, "step": 4333 }, { "epoch": 0.836841089013323, "grad_norm": 0.5890207886695862, "learning_rate": 7.216794283536397e-05, "loss": 0.6039, "step": 4334 }, { "epoch": 0.8370341764819463, "grad_norm": 0.7788511514663696, "learning_rate": 7.215283779888639e-05, "loss": 0.642, "step": 4335 }, { "epoch": 0.8372272639505696, "grad_norm": 0.9195616245269775, "learning_rate": 7.213773024640722e-05, "loss": 0.6717, "step": 4336 }, { "epoch": 0.8374203514191929, "grad_norm": 0.650999128818512, "learning_rate": 7.212262017964229e-05, "loss": 0.7043, "step": 4337 }, { "epoch": 0.8376134388878161, "grad_norm": 0.59653639793396, "learning_rate": 7.210750760030771e-05, "loss": 0.6589, "step": 4338 }, { "epoch": 0.8378065263564395, "grad_norm": 1.0069973468780518, "learning_rate": 7.209239251011993e-05, "loss": 0.5628, "step": 4339 }, { "epoch": 0.8379996138250627, "grad_norm": 0.8241732120513916, "learning_rate": 7.20772749107956e-05, "loss": 0.6287, "step": 4340 }, { "epoch": 0.8381927012936861, "grad_norm": 0.6710103750228882, "learning_rate": 7.20621548040517e-05, "loss": 0.6243, "step": 4341 }, { "epoch": 0.8383857887623093, "grad_norm": 0.8789098262786865, "learning_rate": 7.204703219160551e-05, "loss": 0.6592, "step": 4342 }, { "epoch": 0.8385788762309326, "grad_norm": 0.7646051049232483, "learning_rate": 7.203190707517456e-05, "loss": 0.6486, "step": 4343 }, { "epoch": 0.8387719636995559, "grad_norm": 0.6869224905967712, "learning_rate": 7.20167794564767e-05, "loss": 0.5747, "step": 4344 }, { "epoch": 0.8389650511681792, "grad_norm": 0.7420974969863892, "learning_rate": 7.200164933723e-05, "loss": 0.5996, "step": 4345 }, { "epoch": 0.8391581386368024, "grad_norm": 0.7918408513069153, "learning_rate": 7.198651671915292e-05, "loss": 0.682, "step": 4346 }, { "epoch": 0.8393512261054258, "grad_norm": 0.7411250472068787, "learning_rate": 7.197138160396409e-05, "loss": 0.5629, "step": 4347 }, { "epoch": 0.839544313574049, "grad_norm": 0.6898320913314819, "learning_rate": 7.19562439933825e-05, "loss": 0.6723, "step": 4348 }, { "epoch": 0.8397374010426724, "grad_norm": 0.6888036131858826, "learning_rate": 7.194110388912741e-05, "loss": 0.7077, "step": 4349 }, { "epoch": 0.8399304885112956, "grad_norm": 0.6852665543556213, "learning_rate": 7.192596129291834e-05, "loss": 0.6511, "step": 4350 }, { "epoch": 0.8401235759799189, "grad_norm": 0.7981736063957214, "learning_rate": 7.19108162064751e-05, "loss": 0.6427, "step": 4351 }, { "epoch": 0.8403166634485422, "grad_norm": 0.6735328435897827, "learning_rate": 7.189566863151778e-05, "loss": 0.5812, "step": 4352 }, { "epoch": 0.8405097509171655, "grad_norm": 0.8564249873161316, "learning_rate": 7.18805185697668e-05, "loss": 0.6847, "step": 4353 }, { "epoch": 0.8407028383857887, "grad_norm": 1.1159616708755493, "learning_rate": 7.186536602294278e-05, "loss": 0.6843, "step": 4354 }, { "epoch": 0.8408959258544121, "grad_norm": 0.7022541761398315, "learning_rate": 7.185021099276667e-05, "loss": 0.6973, "step": 4355 }, { "epoch": 0.8410890133230353, "grad_norm": 1.5633482933044434, "learning_rate": 7.183505348095974e-05, "loss": 0.6425, "step": 4356 }, { "epoch": 0.8412821007916587, "grad_norm": 1.6789151430130005, "learning_rate": 7.181989348924346e-05, "loss": 0.7094, "step": 4357 }, { "epoch": 0.8414751882602819, "grad_norm": 0.9996597170829773, "learning_rate": 7.180473101933963e-05, "loss": 0.6218, "step": 4358 }, { "epoch": 0.8416682757289052, "grad_norm": 1.9537867307662964, "learning_rate": 7.178956607297033e-05, "loss": 0.6831, "step": 4359 }, { "epoch": 0.8418613631975285, "grad_norm": 0.9766444563865662, "learning_rate": 7.177439865185791e-05, "loss": 0.6404, "step": 4360 }, { "epoch": 0.8420544506661518, "grad_norm": 0.9376831650733948, "learning_rate": 7.175922875772499e-05, "loss": 0.6651, "step": 4361 }, { "epoch": 0.842247538134775, "grad_norm": 1.3447186946868896, "learning_rate": 7.174405639229452e-05, "loss": 0.6625, "step": 4362 }, { "epoch": 0.8424406256033984, "grad_norm": 1.0226131677627563, "learning_rate": 7.172888155728968e-05, "loss": 0.6183, "step": 4363 }, { "epoch": 0.8426337130720216, "grad_norm": 1.4361892938613892, "learning_rate": 7.171370425443393e-05, "loss": 0.7265, "step": 4364 }, { "epoch": 0.842826800540645, "grad_norm": 0.8719564080238342, "learning_rate": 7.169852448545103e-05, "loss": 0.6381, "step": 4365 }, { "epoch": 0.8430198880092682, "grad_norm": 1.0629814863204956, "learning_rate": 7.168334225206504e-05, "loss": 0.6303, "step": 4366 }, { "epoch": 0.8432129754778915, "grad_norm": 0.7412570118904114, "learning_rate": 7.166815755600028e-05, "loss": 0.7082, "step": 4367 }, { "epoch": 0.8434060629465148, "grad_norm": 0.8056556582450867, "learning_rate": 7.165297039898133e-05, "loss": 0.6343, "step": 4368 }, { "epoch": 0.8435991504151381, "grad_norm": 1.5800800323486328, "learning_rate": 7.163778078273304e-05, "loss": 0.6057, "step": 4369 }, { "epoch": 0.8437922378837613, "grad_norm": 1.8287791013717651, "learning_rate": 7.162258870898063e-05, "loss": 0.6724, "step": 4370 }, { "epoch": 0.8439853253523847, "grad_norm": 1.0139793157577515, "learning_rate": 7.160739417944949e-05, "loss": 0.6522, "step": 4371 }, { "epoch": 0.8441784128210079, "grad_norm": 0.9631819725036621, "learning_rate": 7.159219719586534e-05, "loss": 0.6448, "step": 4372 }, { "epoch": 0.8443715002896311, "grad_norm": 0.8932510614395142, "learning_rate": 7.157699775995419e-05, "loss": 0.6667, "step": 4373 }, { "epoch": 0.8445645877582545, "grad_norm": 0.684370756149292, "learning_rate": 7.156179587344229e-05, "loss": 0.5679, "step": 4374 }, { "epoch": 0.8447576752268777, "grad_norm": 1.7265098094940186, "learning_rate": 7.154659153805619e-05, "loss": 0.6714, "step": 4375 }, { "epoch": 0.8449507626955011, "grad_norm": 0.9909793734550476, "learning_rate": 7.153138475552273e-05, "loss": 0.6246, "step": 4376 }, { "epoch": 0.8451438501641243, "grad_norm": 0.6591453552246094, "learning_rate": 7.151617552756901e-05, "loss": 0.6196, "step": 4377 }, { "epoch": 0.8453369376327476, "grad_norm": 0.625056266784668, "learning_rate": 7.150096385592243e-05, "loss": 0.5976, "step": 4378 }, { "epoch": 0.8455300251013709, "grad_norm": 0.8902250528335571, "learning_rate": 7.148574974231063e-05, "loss": 0.6169, "step": 4379 }, { "epoch": 0.8457231125699942, "grad_norm": 1.6683087348937988, "learning_rate": 7.147053318846154e-05, "loss": 0.5823, "step": 4380 }, { "epoch": 0.8459162000386174, "grad_norm": 1.2465808391571045, "learning_rate": 7.14553141961034e-05, "loss": 0.6376, "step": 4381 }, { "epoch": 0.8461092875072408, "grad_norm": 1.3557692766189575, "learning_rate": 7.144009276696469e-05, "loss": 0.5895, "step": 4382 }, { "epoch": 0.846302374975864, "grad_norm": 0.6865507364273071, "learning_rate": 7.142486890277418e-05, "loss": 0.5719, "step": 4383 }, { "epoch": 0.8464954624444874, "grad_norm": 0.826245129108429, "learning_rate": 7.140964260526091e-05, "loss": 0.6611, "step": 4384 }, { "epoch": 0.8466885499131106, "grad_norm": 0.7080128192901611, "learning_rate": 7.139441387615422e-05, "loss": 0.6635, "step": 4385 }, { "epoch": 0.8468816373817339, "grad_norm": 0.6357041001319885, "learning_rate": 7.137918271718368e-05, "loss": 0.6466, "step": 4386 }, { "epoch": 0.8470747248503572, "grad_norm": 1.3502568006515503, "learning_rate": 7.136394913007918e-05, "loss": 0.652, "step": 4387 }, { "epoch": 0.8472678123189805, "grad_norm": 0.9207178354263306, "learning_rate": 7.13487131165709e-05, "loss": 0.6691, "step": 4388 }, { "epoch": 0.8474608997876037, "grad_norm": 2.3739895820617676, "learning_rate": 7.13334746783892e-05, "loss": 0.6283, "step": 4389 }, { "epoch": 0.8476539872562271, "grad_norm": 0.7967591881752014, "learning_rate": 7.131823381726482e-05, "loss": 0.6774, "step": 4390 }, { "epoch": 0.8478470747248503, "grad_norm": 0.8731507658958435, "learning_rate": 7.130299053492875e-05, "loss": 0.7029, "step": 4391 }, { "epoch": 0.8480401621934737, "grad_norm": 1.1818665266036987, "learning_rate": 7.128774483311222e-05, "loss": 0.7026, "step": 4392 }, { "epoch": 0.8482332496620969, "grad_norm": 0.5726007223129272, "learning_rate": 7.127249671354675e-05, "loss": 0.6382, "step": 4393 }, { "epoch": 0.8484263371307202, "grad_norm": 0.756466269493103, "learning_rate": 7.125724617796415e-05, "loss": 0.5709, "step": 4394 }, { "epoch": 0.8486194245993435, "grad_norm": 1.0987968444824219, "learning_rate": 7.12419932280965e-05, "loss": 0.6392, "step": 4395 }, { "epoch": 0.8488125120679668, "grad_norm": 0.7690591216087341, "learning_rate": 7.122673786567614e-05, "loss": 0.6352, "step": 4396 }, { "epoch": 0.84900559953659, "grad_norm": 0.664874792098999, "learning_rate": 7.121148009243569e-05, "loss": 0.6846, "step": 4397 }, { "epoch": 0.8491986870052134, "grad_norm": 0.7015582323074341, "learning_rate": 7.119621991010806e-05, "loss": 0.6235, "step": 4398 }, { "epoch": 0.8493917744738366, "grad_norm": 0.9646469354629517, "learning_rate": 7.118095732042643e-05, "loss": 0.67, "step": 4399 }, { "epoch": 0.84958486194246, "grad_norm": 0.6109640598297119, "learning_rate": 7.116569232512419e-05, "loss": 0.5954, "step": 4400 }, { "epoch": 0.8497779494110832, "grad_norm": 0.6579117178916931, "learning_rate": 7.11504249259351e-05, "loss": 0.5903, "step": 4401 }, { "epoch": 0.8499710368797065, "grad_norm": 2.078239917755127, "learning_rate": 7.113515512459317e-05, "loss": 0.6442, "step": 4402 }, { "epoch": 0.8501641243483298, "grad_norm": 0.8510857224464417, "learning_rate": 7.111988292283263e-05, "loss": 0.6946, "step": 4403 }, { "epoch": 0.8503572118169531, "grad_norm": 0.5940137505531311, "learning_rate": 7.1104608322388e-05, "loss": 0.7145, "step": 4404 }, { "epoch": 0.8505502992855763, "grad_norm": 1.1646240949630737, "learning_rate": 7.108933132499413e-05, "loss": 0.594, "step": 4405 }, { "epoch": 0.8507433867541997, "grad_norm": 0.9737876653671265, "learning_rate": 7.107405193238609e-05, "loss": 0.6239, "step": 4406 }, { "epoch": 0.8509364742228229, "grad_norm": 0.9205219745635986, "learning_rate": 7.10587701462992e-05, "loss": 0.6003, "step": 4407 }, { "epoch": 0.8511295616914463, "grad_norm": 4.288562774658203, "learning_rate": 7.104348596846912e-05, "loss": 0.6425, "step": 4408 }, { "epoch": 0.8513226491600695, "grad_norm": 0.5832407474517822, "learning_rate": 7.102819940063173e-05, "loss": 0.6251, "step": 4409 }, { "epoch": 0.8515157366286928, "grad_norm": 1.3091590404510498, "learning_rate": 7.101291044452319e-05, "loss": 0.6657, "step": 4410 }, { "epoch": 0.8517088240973161, "grad_norm": 0.6858434081077576, "learning_rate": 7.099761910187996e-05, "loss": 0.6594, "step": 4411 }, { "epoch": 0.8519019115659394, "grad_norm": 0.5370792150497437, "learning_rate": 7.098232537443874e-05, "loss": 0.6629, "step": 4412 }, { "epoch": 0.8520949990345627, "grad_norm": 4.058699131011963, "learning_rate": 7.096702926393653e-05, "loss": 0.6879, "step": 4413 }, { "epoch": 0.852288086503186, "grad_norm": 0.7721856236457825, "learning_rate": 7.095173077211051e-05, "loss": 0.6422, "step": 4414 }, { "epoch": 0.8524811739718092, "grad_norm": 0.5507387518882751, "learning_rate": 7.093642990069826e-05, "loss": 0.6764, "step": 4415 }, { "epoch": 0.8526742614404326, "grad_norm": 0.5686036944389343, "learning_rate": 7.09211266514376e-05, "loss": 0.6633, "step": 4416 }, { "epoch": 0.8528673489090558, "grad_norm": 0.8120847344398499, "learning_rate": 7.090582102606652e-05, "loss": 0.6162, "step": 4417 }, { "epoch": 0.8530604363776791, "grad_norm": 0.5894117951393127, "learning_rate": 7.089051302632339e-05, "loss": 0.5944, "step": 4418 }, { "epoch": 0.8532535238463024, "grad_norm": 1.4176312685012817, "learning_rate": 7.087520265394682e-05, "loss": 0.6057, "step": 4419 }, { "epoch": 0.8534466113149256, "grad_norm": 0.6845067143440247, "learning_rate": 7.085988991067566e-05, "loss": 0.6035, "step": 4420 }, { "epoch": 0.853639698783549, "grad_norm": 0.6220464110374451, "learning_rate": 7.084457479824904e-05, "loss": 0.7148, "step": 4421 }, { "epoch": 0.8538327862521722, "grad_norm": 0.6862390637397766, "learning_rate": 7.082925731840642e-05, "loss": 0.5963, "step": 4422 }, { "epoch": 0.8540258737207955, "grad_norm": 0.6421427130699158, "learning_rate": 7.08139374728874e-05, "loss": 0.5838, "step": 4423 }, { "epoch": 0.8542189611894188, "grad_norm": 0.906126081943512, "learning_rate": 7.079861526343201e-05, "loss": 0.6842, "step": 4424 }, { "epoch": 0.8544120486580421, "grad_norm": 1.4501292705535889, "learning_rate": 7.078329069178042e-05, "loss": 0.6345, "step": 4425 }, { "epoch": 0.8546051361266653, "grad_norm": 1.2065455913543701, "learning_rate": 7.076796375967311e-05, "loss": 0.5964, "step": 4426 }, { "epoch": 0.8547982235952887, "grad_norm": 0.638151228427887, "learning_rate": 7.075263446885085e-05, "loss": 0.6921, "step": 4427 }, { "epoch": 0.8549913110639119, "grad_norm": 1.8190062046051025, "learning_rate": 7.073730282105465e-05, "loss": 0.6823, "step": 4428 }, { "epoch": 0.8551843985325353, "grad_norm": 1.0062735080718994, "learning_rate": 7.072196881802578e-05, "loss": 0.6335, "step": 4429 }, { "epoch": 0.8553774860011585, "grad_norm": 0.5052826404571533, "learning_rate": 7.070663246150581e-05, "loss": 0.6211, "step": 4430 }, { "epoch": 0.8555705734697818, "grad_norm": 1.8220947980880737, "learning_rate": 7.069129375323658e-05, "loss": 0.6673, "step": 4431 }, { "epoch": 0.8557636609384051, "grad_norm": 0.6861618161201477, "learning_rate": 7.067595269496016e-05, "loss": 0.6552, "step": 4432 }, { "epoch": 0.8559567484070284, "grad_norm": 0.6241463422775269, "learning_rate": 7.066060928841892e-05, "loss": 0.6088, "step": 4433 }, { "epoch": 0.8561498358756516, "grad_norm": 1.1497666835784912, "learning_rate": 7.064526353535546e-05, "loss": 0.6732, "step": 4434 }, { "epoch": 0.856342923344275, "grad_norm": 0.9654750823974609, "learning_rate": 7.062991543751268e-05, "loss": 0.6607, "step": 4435 }, { "epoch": 0.8565360108128982, "grad_norm": 2.2296886444091797, "learning_rate": 7.061456499663373e-05, "loss": 0.6001, "step": 4436 }, { "epoch": 0.8567290982815216, "grad_norm": 0.5282419323921204, "learning_rate": 7.059921221446205e-05, "loss": 0.6842, "step": 4437 }, { "epoch": 0.8569221857501448, "grad_norm": 0.6090461015701294, "learning_rate": 7.05838570927413e-05, "loss": 0.6587, "step": 4438 }, { "epoch": 0.8571152732187681, "grad_norm": 0.7584561705589294, "learning_rate": 7.056849963321546e-05, "loss": 0.6485, "step": 4439 }, { "epoch": 0.8573083606873914, "grad_norm": 0.6078665256500244, "learning_rate": 7.055313983762873e-05, "loss": 0.661, "step": 4440 }, { "epoch": 0.8575014481560147, "grad_norm": 0.8394724130630493, "learning_rate": 7.05377777077256e-05, "loss": 0.5793, "step": 4441 }, { "epoch": 0.8576945356246379, "grad_norm": 0.7161808609962463, "learning_rate": 7.052241324525083e-05, "loss": 0.6411, "step": 4442 }, { "epoch": 0.8578876230932613, "grad_norm": 0.40860962867736816, "learning_rate": 7.05070464519494e-05, "loss": 0.5821, "step": 4443 }, { "epoch": 0.8580807105618845, "grad_norm": 0.562476634979248, "learning_rate": 7.049167732956663e-05, "loss": 0.5761, "step": 4444 }, { "epoch": 0.8582737980305079, "grad_norm": 0.6394474506378174, "learning_rate": 7.047630587984806e-05, "loss": 0.6331, "step": 4445 }, { "epoch": 0.8584668854991311, "grad_norm": 0.6832359433174133, "learning_rate": 7.046093210453945e-05, "loss": 0.6824, "step": 4446 }, { "epoch": 0.8586599729677544, "grad_norm": 0.6969436407089233, "learning_rate": 7.044555600538692e-05, "loss": 0.6149, "step": 4447 }, { "epoch": 0.8588530604363777, "grad_norm": 0.8989633917808533, "learning_rate": 7.04301775841368e-05, "loss": 0.611, "step": 4448 }, { "epoch": 0.859046147905001, "grad_norm": 0.8514688611030579, "learning_rate": 7.041479684253567e-05, "loss": 0.6455, "step": 4449 }, { "epoch": 0.8592392353736242, "grad_norm": 0.648291289806366, "learning_rate": 7.039941378233041e-05, "loss": 0.5634, "step": 4450 }, { "epoch": 0.8594323228422476, "grad_norm": 0.6688785552978516, "learning_rate": 7.038402840526814e-05, "loss": 0.6673, "step": 4451 }, { "epoch": 0.8596254103108708, "grad_norm": 1.5229767560958862, "learning_rate": 7.036864071309624e-05, "loss": 0.6249, "step": 4452 }, { "epoch": 0.8598184977794942, "grad_norm": 0.5260521769523621, "learning_rate": 7.035325070756237e-05, "loss": 0.6325, "step": 4453 }, { "epoch": 0.8600115852481174, "grad_norm": 0.6572993993759155, "learning_rate": 7.033785839041444e-05, "loss": 0.635, "step": 4454 }, { "epoch": 0.8602046727167407, "grad_norm": 0.8375421762466431, "learning_rate": 7.032246376340064e-05, "loss": 0.6114, "step": 4455 }, { "epoch": 0.860397760185364, "grad_norm": 0.555708646774292, "learning_rate": 7.030706682826942e-05, "loss": 0.6116, "step": 4456 }, { "epoch": 0.8605908476539873, "grad_norm": 0.6854230761528015, "learning_rate": 7.029166758676944e-05, "loss": 0.6143, "step": 4457 }, { "epoch": 0.8607839351226105, "grad_norm": 0.7899404764175415, "learning_rate": 7.027626604064969e-05, "loss": 0.6012, "step": 4458 }, { "epoch": 0.8609770225912339, "grad_norm": 1.0170526504516602, "learning_rate": 7.026086219165941e-05, "loss": 0.6268, "step": 4459 }, { "epoch": 0.8611701100598571, "grad_norm": 0.561224639415741, "learning_rate": 7.024545604154806e-05, "loss": 0.6689, "step": 4460 }, { "epoch": 0.8613631975284805, "grad_norm": 0.4896526038646698, "learning_rate": 7.02300475920654e-05, "loss": 0.6565, "step": 4461 }, { "epoch": 0.8615562849971037, "grad_norm": 0.5838367938995361, "learning_rate": 7.021463684496145e-05, "loss": 0.6779, "step": 4462 }, { "epoch": 0.861749372465727, "grad_norm": 1.0824581384658813, "learning_rate": 7.019922380198647e-05, "loss": 0.6152, "step": 4463 }, { "epoch": 0.8619424599343503, "grad_norm": 0.5583774447441101, "learning_rate": 7.0183808464891e-05, "loss": 0.6889, "step": 4464 }, { "epoch": 0.8621355474029736, "grad_norm": 1.8096314668655396, "learning_rate": 7.016839083542581e-05, "loss": 0.5965, "step": 4465 }, { "epoch": 0.8623286348715968, "grad_norm": 0.708705484867096, "learning_rate": 7.015297091534198e-05, "loss": 0.5686, "step": 4466 }, { "epoch": 0.8625217223402201, "grad_norm": 0.8174300789833069, "learning_rate": 7.01375487063908e-05, "loss": 0.6192, "step": 4467 }, { "epoch": 0.8627148098088434, "grad_norm": 0.8981893062591553, "learning_rate": 7.012212421032387e-05, "loss": 0.6094, "step": 4468 }, { "epoch": 0.8629078972774666, "grad_norm": 0.6748481392860413, "learning_rate": 7.010669742889298e-05, "loss": 0.661, "step": 4469 }, { "epoch": 0.86310098474609, "grad_norm": 0.5539877414703369, "learning_rate": 7.009126836385026e-05, "loss": 0.6793, "step": 4470 }, { "epoch": 0.8632940722147132, "grad_norm": 0.6438637375831604, "learning_rate": 7.007583701694806e-05, "loss": 0.6977, "step": 4471 }, { "epoch": 0.8634871596833366, "grad_norm": 0.5274230241775513, "learning_rate": 7.006040338993896e-05, "loss": 0.6493, "step": 4472 }, { "epoch": 0.8636802471519598, "grad_norm": 1.1320797204971313, "learning_rate": 7.004496748457585e-05, "loss": 0.6639, "step": 4473 }, { "epoch": 0.8638733346205831, "grad_norm": 1.742615818977356, "learning_rate": 7.002952930261186e-05, "loss": 0.6041, "step": 4474 }, { "epoch": 0.8640664220892064, "grad_norm": 0.5839645266532898, "learning_rate": 7.001408884580036e-05, "loss": 0.639, "step": 4475 }, { "epoch": 0.8642595095578297, "grad_norm": 0.7072002291679382, "learning_rate": 6.999864611589503e-05, "loss": 0.6183, "step": 4476 }, { "epoch": 0.8644525970264529, "grad_norm": 1.0309823751449585, "learning_rate": 6.998320111464974e-05, "loss": 0.6691, "step": 4477 }, { "epoch": 0.8646456844950763, "grad_norm": 0.6220638751983643, "learning_rate": 6.996775384381866e-05, "loss": 0.6149, "step": 4478 }, { "epoch": 0.8648387719636995, "grad_norm": 1.1891947984695435, "learning_rate": 6.99523043051562e-05, "loss": 0.6148, "step": 4479 }, { "epoch": 0.8650318594323229, "grad_norm": 0.7138386964797974, "learning_rate": 6.993685250041708e-05, "loss": 0.6871, "step": 4480 }, { "epoch": 0.8652249469009461, "grad_norm": 0.777233898639679, "learning_rate": 6.992139843135617e-05, "loss": 0.6531, "step": 4481 }, { "epoch": 0.8654180343695694, "grad_norm": 1.1398365497589111, "learning_rate": 6.990594209972869e-05, "loss": 0.6861, "step": 4482 }, { "epoch": 0.8656111218381927, "grad_norm": 0.6007878184318542, "learning_rate": 6.989048350729011e-05, "loss": 0.605, "step": 4483 }, { "epoch": 0.865804209306816, "grad_norm": 0.7482002973556519, "learning_rate": 6.987502265579611e-05, "loss": 0.6661, "step": 4484 }, { "epoch": 0.8659972967754392, "grad_norm": 0.8254035115242004, "learning_rate": 6.985955954700265e-05, "loss": 0.6085, "step": 4485 }, { "epoch": 0.8661903842440626, "grad_norm": 0.8873386383056641, "learning_rate": 6.984409418266596e-05, "loss": 0.6611, "step": 4486 }, { "epoch": 0.8663834717126858, "grad_norm": 0.41052666306495667, "learning_rate": 6.98286265645425e-05, "loss": 0.6407, "step": 4487 }, { "epoch": 0.8665765591813092, "grad_norm": 0.6478103995323181, "learning_rate": 6.981315669438901e-05, "loss": 0.6351, "step": 4488 }, { "epoch": 0.8667696466499324, "grad_norm": 0.5919353365898132, "learning_rate": 6.979768457396247e-05, "loss": 0.6299, "step": 4489 }, { "epoch": 0.8669627341185557, "grad_norm": 0.49171769618988037, "learning_rate": 6.978221020502014e-05, "loss": 0.6136, "step": 4490 }, { "epoch": 0.867155821587179, "grad_norm": 2.1982851028442383, "learning_rate": 6.97667335893195e-05, "loss": 0.6114, "step": 4491 }, { "epoch": 0.8673489090558023, "grad_norm": 0.7241982817649841, "learning_rate": 6.975125472861829e-05, "loss": 0.6667, "step": 4492 }, { "epoch": 0.8675419965244255, "grad_norm": 0.6139934062957764, "learning_rate": 6.973577362467453e-05, "loss": 0.6248, "step": 4493 }, { "epoch": 0.8677350839930489, "grad_norm": 0.44683992862701416, "learning_rate": 6.97202902792465e-05, "loss": 0.7269, "step": 4494 }, { "epoch": 0.8679281714616721, "grad_norm": 2.691729784011841, "learning_rate": 6.970480469409267e-05, "loss": 0.6126, "step": 4495 }, { "epoch": 0.8681212589302955, "grad_norm": 0.4577072858810425, "learning_rate": 6.968931687097186e-05, "loss": 0.6219, "step": 4496 }, { "epoch": 0.8683143463989187, "grad_norm": 0.6380960941314697, "learning_rate": 6.967382681164306e-05, "loss": 0.6122, "step": 4497 }, { "epoch": 0.868507433867542, "grad_norm": 0.6487831473350525, "learning_rate": 6.965833451786556e-05, "loss": 0.6549, "step": 4498 }, { "epoch": 0.8687005213361653, "grad_norm": 0.512052595615387, "learning_rate": 6.964283999139892e-05, "loss": 0.5972, "step": 4499 }, { "epoch": 0.8688936088047886, "grad_norm": 0.6446790099143982, "learning_rate": 6.962734323400287e-05, "loss": 0.6135, "step": 4500 }, { "epoch": 0.8688936088047886, "eval_loss": 0.6810309886932373, "eval_runtime": 49.7628, "eval_samples_per_second": 13.343, "eval_steps_per_second": 0.422, "step": 4500 }, { "epoch": 0.8690866962734118, "grad_norm": 1.141732931137085, "learning_rate": 6.961184424743748e-05, "loss": 0.6256, "step": 4501 }, { "epoch": 0.8692797837420352, "grad_norm": 0.5211224555969238, "learning_rate": 6.959634303346305e-05, "loss": 0.6911, "step": 4502 }, { "epoch": 0.8694728712106584, "grad_norm": 1.584612488746643, "learning_rate": 6.958083959384012e-05, "loss": 0.6233, "step": 4503 }, { "epoch": 0.8696659586792818, "grad_norm": 0.9083097577095032, "learning_rate": 6.956533393032946e-05, "loss": 0.6581, "step": 4504 }, { "epoch": 0.869859046147905, "grad_norm": 0.6908925175666809, "learning_rate": 6.95498260446922e-05, "loss": 0.6379, "step": 4505 }, { "epoch": 0.8700521336165283, "grad_norm": 1.0124554634094238, "learning_rate": 6.953431593868955e-05, "loss": 0.6393, "step": 4506 }, { "epoch": 0.8702452210851516, "grad_norm": 0.695329487323761, "learning_rate": 6.95188036140831e-05, "loss": 0.6676, "step": 4507 }, { "epoch": 0.8704383085537749, "grad_norm": 0.7081416249275208, "learning_rate": 6.950328907263467e-05, "loss": 0.6656, "step": 4508 }, { "epoch": 0.8706313960223981, "grad_norm": 0.4955194890499115, "learning_rate": 6.948777231610631e-05, "loss": 0.6093, "step": 4509 }, { "epoch": 0.8708244834910215, "grad_norm": 1.4666061401367188, "learning_rate": 6.947225334626033e-05, "loss": 0.5782, "step": 4510 }, { "epoch": 0.8710175709596447, "grad_norm": 1.3877815008163452, "learning_rate": 6.94567321648593e-05, "loss": 0.6258, "step": 4511 }, { "epoch": 0.8712106584282681, "grad_norm": 1.0996912717819214, "learning_rate": 6.944120877366604e-05, "loss": 0.6359, "step": 4512 }, { "epoch": 0.8714037458968913, "grad_norm": 0.6708475947380066, "learning_rate": 6.942568317444358e-05, "loss": 0.647, "step": 4513 }, { "epoch": 0.8715968333655145, "grad_norm": 1.1896133422851562, "learning_rate": 6.941015536895528e-05, "loss": 0.6423, "step": 4514 }, { "epoch": 0.8717899208341379, "grad_norm": 0.44012805819511414, "learning_rate": 6.939462535896466e-05, "loss": 0.5746, "step": 4515 }, { "epoch": 0.8719830083027611, "grad_norm": 0.6881606578826904, "learning_rate": 6.937909314623556e-05, "loss": 0.6883, "step": 4516 }, { "epoch": 0.8721760957713844, "grad_norm": 0.8997504115104675, "learning_rate": 6.936355873253206e-05, "loss": 0.6396, "step": 4517 }, { "epoch": 0.8723691832400077, "grad_norm": 0.5464752316474915, "learning_rate": 6.934802211961843e-05, "loss": 0.6442, "step": 4518 }, { "epoch": 0.872562270708631, "grad_norm": 0.6429591774940491, "learning_rate": 6.933248330925932e-05, "loss": 0.6514, "step": 4519 }, { "epoch": 0.8727553581772542, "grad_norm": 0.5166534781455994, "learning_rate": 6.931694230321945e-05, "loss": 0.6937, "step": 4520 }, { "epoch": 0.8729484456458776, "grad_norm": 0.6237115263938904, "learning_rate": 6.930139910326393e-05, "loss": 0.673, "step": 4521 }, { "epoch": 0.8731415331145008, "grad_norm": 0.7250605821609497, "learning_rate": 6.928585371115808e-05, "loss": 0.6396, "step": 4522 }, { "epoch": 0.8733346205831242, "grad_norm": 0.8298479318618774, "learning_rate": 6.927030612866747e-05, "loss": 0.6401, "step": 4523 }, { "epoch": 0.8735277080517474, "grad_norm": 0.7469162940979004, "learning_rate": 6.925475635755785e-05, "loss": 0.7102, "step": 4524 }, { "epoch": 0.8737207955203707, "grad_norm": 0.5978951454162598, "learning_rate": 6.923920439959537e-05, "loss": 0.5942, "step": 4525 }, { "epoch": 0.873913882988994, "grad_norm": 1.2104426622390747, "learning_rate": 6.922365025654628e-05, "loss": 0.5913, "step": 4526 }, { "epoch": 0.8741069704576173, "grad_norm": 0.5999271273612976, "learning_rate": 6.920809393017716e-05, "loss": 0.6563, "step": 4527 }, { "epoch": 0.8743000579262405, "grad_norm": 0.8190325498580933, "learning_rate": 6.919253542225479e-05, "loss": 0.6652, "step": 4528 }, { "epoch": 0.8744931453948639, "grad_norm": 0.5693963170051575, "learning_rate": 6.917697473454624e-05, "loss": 0.6523, "step": 4529 }, { "epoch": 0.8746862328634871, "grad_norm": 1.3084132671356201, "learning_rate": 6.916141186881882e-05, "loss": 0.6451, "step": 4530 }, { "epoch": 0.8748793203321105, "grad_norm": 0.7634469866752625, "learning_rate": 6.914584682684007e-05, "loss": 0.6143, "step": 4531 }, { "epoch": 0.8750724078007337, "grad_norm": 0.5933704376220703, "learning_rate": 6.913027961037776e-05, "loss": 0.693, "step": 4532 }, { "epoch": 0.875265495269357, "grad_norm": 1.364932656288147, "learning_rate": 6.911471022119998e-05, "loss": 0.6186, "step": 4533 }, { "epoch": 0.8754585827379803, "grad_norm": 0.9339388012886047, "learning_rate": 6.9099138661075e-05, "loss": 0.6273, "step": 4534 }, { "epoch": 0.8756516702066036, "grad_norm": 0.6394757032394409, "learning_rate": 6.908356493177131e-05, "loss": 0.7002, "step": 4535 }, { "epoch": 0.8758447576752268, "grad_norm": 1.030130386352539, "learning_rate": 6.906798903505776e-05, "loss": 0.6759, "step": 4536 }, { "epoch": 0.8760378451438502, "grad_norm": 0.5877071022987366, "learning_rate": 6.905241097270334e-05, "loss": 0.5881, "step": 4537 }, { "epoch": 0.8762309326124734, "grad_norm": 1.0236424207687378, "learning_rate": 6.903683074647731e-05, "loss": 0.6808, "step": 4538 }, { "epoch": 0.8764240200810968, "grad_norm": 0.8845412731170654, "learning_rate": 6.902124835814922e-05, "loss": 0.6666, "step": 4539 }, { "epoch": 0.87661710754972, "grad_norm": 1.0208134651184082, "learning_rate": 6.900566380948884e-05, "loss": 0.5872, "step": 4540 }, { "epoch": 0.8768101950183433, "grad_norm": 0.5519770979881287, "learning_rate": 6.899007710226614e-05, "loss": 0.5671, "step": 4541 }, { "epoch": 0.8770032824869666, "grad_norm": 1.3116523027420044, "learning_rate": 6.897448823825142e-05, "loss": 0.5934, "step": 4542 }, { "epoch": 0.8771963699555899, "grad_norm": 0.5945027470588684, "learning_rate": 6.895889721921514e-05, "loss": 0.63, "step": 4543 }, { "epoch": 0.8773894574242131, "grad_norm": 0.5481628179550171, "learning_rate": 6.894330404692808e-05, "loss": 0.6651, "step": 4544 }, { "epoch": 0.8775825448928365, "grad_norm": 1.7462215423583984, "learning_rate": 6.892770872316123e-05, "loss": 0.6274, "step": 4545 }, { "epoch": 0.8777756323614597, "grad_norm": 1.639256238937378, "learning_rate": 6.891211124968577e-05, "loss": 0.6165, "step": 4546 }, { "epoch": 0.8779687198300831, "grad_norm": 0.6672441363334656, "learning_rate": 6.889651162827325e-05, "loss": 0.7172, "step": 4547 }, { "epoch": 0.8781618072987063, "grad_norm": 0.6141162514686584, "learning_rate": 6.888090986069534e-05, "loss": 0.5607, "step": 4548 }, { "epoch": 0.8783548947673296, "grad_norm": 1.3427715301513672, "learning_rate": 6.886530594872402e-05, "loss": 0.6251, "step": 4549 }, { "epoch": 0.8785479822359529, "grad_norm": 0.7434550523757935, "learning_rate": 6.88496998941315e-05, "loss": 0.7307, "step": 4550 }, { "epoch": 0.8787410697045762, "grad_norm": 0.5599385499954224, "learning_rate": 6.883409169869028e-05, "loss": 0.6372, "step": 4551 }, { "epoch": 0.8789341571731994, "grad_norm": 0.7189441323280334, "learning_rate": 6.881848136417297e-05, "loss": 0.6583, "step": 4552 }, { "epoch": 0.8791272446418228, "grad_norm": 0.8551734089851379, "learning_rate": 6.880286889235255e-05, "loss": 0.7033, "step": 4553 }, { "epoch": 0.879320332110446, "grad_norm": 0.6267783045768738, "learning_rate": 6.878725428500224e-05, "loss": 0.5715, "step": 4554 }, { "epoch": 0.8795134195790694, "grad_norm": 1.2844675779342651, "learning_rate": 6.87716375438954e-05, "loss": 0.6447, "step": 4555 }, { "epoch": 0.8797065070476926, "grad_norm": 0.6724733710289001, "learning_rate": 6.875601867080572e-05, "loss": 0.6478, "step": 4556 }, { "epoch": 0.8798995945163159, "grad_norm": 0.6470775604248047, "learning_rate": 6.874039766750713e-05, "loss": 0.6022, "step": 4557 }, { "epoch": 0.8800926819849392, "grad_norm": 0.5689084529876709, "learning_rate": 6.872477453577375e-05, "loss": 0.7286, "step": 4558 }, { "epoch": 0.8802857694535625, "grad_norm": 0.6075646877288818, "learning_rate": 6.870914927737999e-05, "loss": 0.613, "step": 4559 }, { "epoch": 0.8804788569221857, "grad_norm": 0.6229695081710815, "learning_rate": 6.869352189410047e-05, "loss": 0.6513, "step": 4560 }, { "epoch": 0.8806719443908091, "grad_norm": 0.7350867986679077, "learning_rate": 6.86778923877101e-05, "loss": 0.6172, "step": 4561 }, { "epoch": 0.8808650318594323, "grad_norm": 0.6290453672409058, "learning_rate": 6.866226075998395e-05, "loss": 0.6218, "step": 4562 }, { "epoch": 0.8810581193280556, "grad_norm": 2.6398890018463135, "learning_rate": 6.86466270126974e-05, "loss": 0.7096, "step": 4563 }, { "epoch": 0.8812512067966789, "grad_norm": 0.7029866576194763, "learning_rate": 6.863099114762603e-05, "loss": 0.6358, "step": 4564 }, { "epoch": 0.8814442942653021, "grad_norm": 0.6251575946807861, "learning_rate": 6.861535316654572e-05, "loss": 0.6583, "step": 4565 }, { "epoch": 0.8816373817339255, "grad_norm": 0.7476343512535095, "learning_rate": 6.859971307123251e-05, "loss": 0.601, "step": 4566 }, { "epoch": 0.8818304692025487, "grad_norm": 0.7135818004608154, "learning_rate": 6.858407086346273e-05, "loss": 0.6192, "step": 4567 }, { "epoch": 0.882023556671172, "grad_norm": 0.6767486333847046, "learning_rate": 6.856842654501295e-05, "loss": 0.6404, "step": 4568 }, { "epoch": 0.8822166441397953, "grad_norm": 0.5115534663200378, "learning_rate": 6.855278011765995e-05, "loss": 0.6034, "step": 4569 }, { "epoch": 0.8824097316084186, "grad_norm": 0.7446635365486145, "learning_rate": 6.853713158318078e-05, "loss": 0.6291, "step": 4570 }, { "epoch": 0.8826028190770419, "grad_norm": 0.8530570268630981, "learning_rate": 6.85214809433527e-05, "loss": 0.6512, "step": 4571 }, { "epoch": 0.8827959065456652, "grad_norm": 0.8484787940979004, "learning_rate": 6.850582819995327e-05, "loss": 0.6194, "step": 4572 }, { "epoch": 0.8829889940142884, "grad_norm": 0.8695933818817139, "learning_rate": 6.84901733547602e-05, "loss": 0.6654, "step": 4573 }, { "epoch": 0.8831820814829118, "grad_norm": 0.6809435486793518, "learning_rate": 6.847451640955151e-05, "loss": 0.6921, "step": 4574 }, { "epoch": 0.883375168951535, "grad_norm": 0.8609122037887573, "learning_rate": 6.845885736610543e-05, "loss": 0.6819, "step": 4575 }, { "epoch": 0.8835682564201583, "grad_norm": 0.6134507060050964, "learning_rate": 6.844319622620039e-05, "loss": 0.6823, "step": 4576 }, { "epoch": 0.8837613438887816, "grad_norm": 1.1709365844726562, "learning_rate": 6.842753299161516e-05, "loss": 0.6213, "step": 4577 }, { "epoch": 0.8839544313574049, "grad_norm": 0.7320404052734375, "learning_rate": 6.841186766412865e-05, "loss": 0.7081, "step": 4578 }, { "epoch": 0.8841475188260282, "grad_norm": 0.6142033338546753, "learning_rate": 6.839620024552008e-05, "loss": 0.6089, "step": 4579 }, { "epoch": 0.8843406062946515, "grad_norm": 1.5938800573349, "learning_rate": 6.838053073756882e-05, "loss": 0.6639, "step": 4580 }, { "epoch": 0.8845336937632747, "grad_norm": 0.7841917872428894, "learning_rate": 6.836485914205455e-05, "loss": 0.6765, "step": 4581 }, { "epoch": 0.8847267812318981, "grad_norm": 0.7257996797561646, "learning_rate": 6.834918546075719e-05, "loss": 0.6764, "step": 4582 }, { "epoch": 0.8849198687005213, "grad_norm": 0.5938121676445007, "learning_rate": 6.833350969545686e-05, "loss": 0.5639, "step": 4583 }, { "epoch": 0.8851129561691446, "grad_norm": 0.9914392232894897, "learning_rate": 6.83178318479339e-05, "loss": 0.6566, "step": 4584 }, { "epoch": 0.8853060436377679, "grad_norm": 0.8945817947387695, "learning_rate": 6.830215191996897e-05, "loss": 0.595, "step": 4585 }, { "epoch": 0.8854991311063912, "grad_norm": 0.6019495129585266, "learning_rate": 6.828646991334287e-05, "loss": 0.6527, "step": 4586 }, { "epoch": 0.8856922185750145, "grad_norm": 0.7125729322433472, "learning_rate": 6.82707858298367e-05, "loss": 0.6189, "step": 4587 }, { "epoch": 0.8858853060436378, "grad_norm": 1.0576165914535522, "learning_rate": 6.825509967123179e-05, "loss": 0.679, "step": 4588 }, { "epoch": 0.886078393512261, "grad_norm": 0.7496209740638733, "learning_rate": 6.823941143930963e-05, "loss": 0.7126, "step": 4589 }, { "epoch": 0.8862714809808844, "grad_norm": 0.8259031176567078, "learning_rate": 6.822372113585208e-05, "loss": 0.6387, "step": 4590 }, { "epoch": 0.8864645684495076, "grad_norm": 0.6874018907546997, "learning_rate": 6.820802876264112e-05, "loss": 0.6265, "step": 4591 }, { "epoch": 0.886657655918131, "grad_norm": 0.5410770773887634, "learning_rate": 6.8192334321459e-05, "loss": 0.6338, "step": 4592 }, { "epoch": 0.8868507433867542, "grad_norm": 0.6544023156166077, "learning_rate": 6.817663781408824e-05, "loss": 0.6743, "step": 4593 }, { "epoch": 0.8870438308553775, "grad_norm": 0.5541006922721863, "learning_rate": 6.816093924231156e-05, "loss": 0.623, "step": 4594 }, { "epoch": 0.8872369183240008, "grad_norm": 0.6012977957725525, "learning_rate": 6.814523860791189e-05, "loss": 0.6703, "step": 4595 }, { "epoch": 0.8874300057926241, "grad_norm": 0.6204688549041748, "learning_rate": 6.812953591267245e-05, "loss": 0.6821, "step": 4596 }, { "epoch": 0.8876230932612473, "grad_norm": 0.5613247752189636, "learning_rate": 6.811383115837668e-05, "loss": 0.7002, "step": 4597 }, { "epoch": 0.8878161807298707, "grad_norm": 0.9611087441444397, "learning_rate": 6.80981243468082e-05, "loss": 0.7167, "step": 4598 }, { "epoch": 0.8880092681984939, "grad_norm": 0.6000582575798035, "learning_rate": 6.808241547975094e-05, "loss": 0.6172, "step": 4599 }, { "epoch": 0.8882023556671172, "grad_norm": 1.4425179958343506, "learning_rate": 6.806670455898905e-05, "loss": 0.6719, "step": 4600 }, { "epoch": 0.8883954431357405, "grad_norm": 0.6673192977905273, "learning_rate": 6.805099158630684e-05, "loss": 0.5958, "step": 4601 }, { "epoch": 0.8885885306043638, "grad_norm": 0.7776142358779907, "learning_rate": 6.803527656348894e-05, "loss": 0.6104, "step": 4602 }, { "epoch": 0.8887816180729871, "grad_norm": 0.6840052008628845, "learning_rate": 6.801955949232018e-05, "loss": 0.6564, "step": 4603 }, { "epoch": 0.8889747055416104, "grad_norm": 0.5371056199073792, "learning_rate": 6.80038403745856e-05, "loss": 0.6254, "step": 4604 }, { "epoch": 0.8891677930102336, "grad_norm": 0.5354758501052856, "learning_rate": 6.798811921207051e-05, "loss": 0.6156, "step": 4605 }, { "epoch": 0.889360880478857, "grad_norm": 0.582090437412262, "learning_rate": 6.797239600656042e-05, "loss": 0.6202, "step": 4606 }, { "epoch": 0.8895539679474802, "grad_norm": 1.7798964977264404, "learning_rate": 6.795667075984113e-05, "loss": 0.7075, "step": 4607 }, { "epoch": 0.8897470554161035, "grad_norm": 0.9182279706001282, "learning_rate": 6.794094347369857e-05, "loss": 0.6558, "step": 4608 }, { "epoch": 0.8899401428847268, "grad_norm": 0.8751780986785889, "learning_rate": 6.7925214149919e-05, "loss": 0.6027, "step": 4609 }, { "epoch": 0.89013323035335, "grad_norm": 0.7295486330986023, "learning_rate": 6.790948279028886e-05, "loss": 0.6202, "step": 4610 }, { "epoch": 0.8903263178219734, "grad_norm": 0.6659254431724548, "learning_rate": 6.789374939659486e-05, "loss": 0.6389, "step": 4611 }, { "epoch": 0.8905194052905966, "grad_norm": 0.6495497226715088, "learning_rate": 6.787801397062388e-05, "loss": 0.6886, "step": 4612 }, { "epoch": 0.8907124927592199, "grad_norm": 0.6363178491592407, "learning_rate": 6.786227651416308e-05, "loss": 0.6369, "step": 4613 }, { "epoch": 0.8909055802278432, "grad_norm": 1.258786678314209, "learning_rate": 6.784653702899983e-05, "loss": 0.6787, "step": 4614 }, { "epoch": 0.8910986676964665, "grad_norm": 0.7554261088371277, "learning_rate": 6.783079551692177e-05, "loss": 0.6279, "step": 4615 }, { "epoch": 0.8912917551650897, "grad_norm": 1.1521495580673218, "learning_rate": 6.781505197971669e-05, "loss": 0.59, "step": 4616 }, { "epoch": 0.8914848426337131, "grad_norm": 3.263683557510376, "learning_rate": 6.779930641917267e-05, "loss": 0.6463, "step": 4617 }, { "epoch": 0.8916779301023363, "grad_norm": 1.3134212493896484, "learning_rate": 6.778355883707804e-05, "loss": 0.6694, "step": 4618 }, { "epoch": 0.8918710175709597, "grad_norm": 0.7594218254089355, "learning_rate": 6.776780923522128e-05, "loss": 0.5442, "step": 4619 }, { "epoch": 0.8920641050395829, "grad_norm": 0.9508801698684692, "learning_rate": 6.775205761539119e-05, "loss": 0.6328, "step": 4620 }, { "epoch": 0.8922571925082062, "grad_norm": 0.6221765279769897, "learning_rate": 6.773630397937672e-05, "loss": 0.6394, "step": 4621 }, { "epoch": 0.8924502799768295, "grad_norm": 0.919313907623291, "learning_rate": 6.772054832896712e-05, "loss": 0.617, "step": 4622 }, { "epoch": 0.8926433674454528, "grad_norm": 0.7842068672180176, "learning_rate": 6.770479066595178e-05, "loss": 0.6294, "step": 4623 }, { "epoch": 0.892836454914076, "grad_norm": 0.5024833083152771, "learning_rate": 6.76890309921204e-05, "loss": 0.6017, "step": 4624 }, { "epoch": 0.8930295423826994, "grad_norm": 0.4059761166572571, "learning_rate": 6.767326930926292e-05, "loss": 0.6116, "step": 4625 }, { "epoch": 0.8932226298513226, "grad_norm": 0.9284073114395142, "learning_rate": 6.765750561916941e-05, "loss": 0.6117, "step": 4626 }, { "epoch": 0.893415717319946, "grad_norm": 0.734467089176178, "learning_rate": 6.764173992363023e-05, "loss": 0.5984, "step": 4627 }, { "epoch": 0.8936088047885692, "grad_norm": 0.4602697789669037, "learning_rate": 6.7625972224436e-05, "loss": 0.5708, "step": 4628 }, { "epoch": 0.8938018922571925, "grad_norm": 0.6801589727401733, "learning_rate": 6.76102025233775e-05, "loss": 0.6626, "step": 4629 }, { "epoch": 0.8939949797258158, "grad_norm": 0.5258141160011292, "learning_rate": 6.759443082224579e-05, "loss": 0.6539, "step": 4630 }, { "epoch": 0.8941880671944391, "grad_norm": 0.9753859639167786, "learning_rate": 6.757865712283214e-05, "loss": 0.7244, "step": 4631 }, { "epoch": 0.8943811546630623, "grad_norm": 0.740858793258667, "learning_rate": 6.756288142692802e-05, "loss": 0.6223, "step": 4632 }, { "epoch": 0.8945742421316857, "grad_norm": 0.4907265603542328, "learning_rate": 6.754710373632516e-05, "loss": 0.6804, "step": 4633 }, { "epoch": 0.8947673296003089, "grad_norm": 0.6997267007827759, "learning_rate": 6.753132405281551e-05, "loss": 0.742, "step": 4634 }, { "epoch": 0.8949604170689323, "grad_norm": 0.49873873591423035, "learning_rate": 6.751554237819122e-05, "loss": 0.5945, "step": 4635 }, { "epoch": 0.8951535045375555, "grad_norm": 0.6248388886451721, "learning_rate": 6.749975871424472e-05, "loss": 0.675, "step": 4636 }, { "epoch": 0.8953465920061788, "grad_norm": 1.1016077995300293, "learning_rate": 6.748397306276863e-05, "loss": 0.6767, "step": 4637 }, { "epoch": 0.8955396794748021, "grad_norm": 3.2900521755218506, "learning_rate": 6.746818542555577e-05, "loss": 0.6331, "step": 4638 }, { "epoch": 0.8957327669434254, "grad_norm": 1.6435434818267822, "learning_rate": 6.745239580439926e-05, "loss": 0.6385, "step": 4639 }, { "epoch": 0.8959258544120486, "grad_norm": 1.0987555980682373, "learning_rate": 6.743660420109238e-05, "loss": 0.6463, "step": 4640 }, { "epoch": 0.896118941880672, "grad_norm": 1.1642335653305054, "learning_rate": 6.742081061742864e-05, "loss": 0.6317, "step": 4641 }, { "epoch": 0.8963120293492952, "grad_norm": 0.603561282157898, "learning_rate": 6.740501505520183e-05, "loss": 0.5855, "step": 4642 }, { "epoch": 0.8965051168179186, "grad_norm": 1.0436558723449707, "learning_rate": 6.738921751620588e-05, "loss": 0.7199, "step": 4643 }, { "epoch": 0.8966982042865418, "grad_norm": 0.6551451683044434, "learning_rate": 6.737341800223502e-05, "loss": 0.6487, "step": 4644 }, { "epoch": 0.8968912917551651, "grad_norm": 0.5204874277114868, "learning_rate": 6.735761651508368e-05, "loss": 0.5918, "step": 4645 }, { "epoch": 0.8970843792237884, "grad_norm": 0.7154489159584045, "learning_rate": 6.73418130565465e-05, "loss": 0.5778, "step": 4646 }, { "epoch": 0.8972774666924117, "grad_norm": 0.6489051580429077, "learning_rate": 6.732600762841834e-05, "loss": 0.6676, "step": 4647 }, { "epoch": 0.8974705541610349, "grad_norm": 0.6734768152236938, "learning_rate": 6.73102002324943e-05, "loss": 0.6917, "step": 4648 }, { "epoch": 0.8976636416296583, "grad_norm": 0.6043325662612915, "learning_rate": 6.729439087056971e-05, "loss": 0.7228, "step": 4649 }, { "epoch": 0.8978567290982815, "grad_norm": 0.6807748675346375, "learning_rate": 6.727857954444012e-05, "loss": 0.6206, "step": 4650 }, { "epoch": 0.8980498165669049, "grad_norm": 1.7725549936294556, "learning_rate": 6.72627662559013e-05, "loss": 0.6325, "step": 4651 }, { "epoch": 0.8982429040355281, "grad_norm": 0.895638644695282, "learning_rate": 6.724695100674921e-05, "loss": 0.6479, "step": 4652 }, { "epoch": 0.8984359915041514, "grad_norm": 0.5637390613555908, "learning_rate": 6.723113379878006e-05, "loss": 0.613, "step": 4653 }, { "epoch": 0.8986290789727747, "grad_norm": 1.1343525648117065, "learning_rate": 6.721531463379033e-05, "loss": 0.6699, "step": 4654 }, { "epoch": 0.898822166441398, "grad_norm": 0.671237051486969, "learning_rate": 6.719949351357664e-05, "loss": 0.7013, "step": 4655 }, { "epoch": 0.8990152539100212, "grad_norm": 0.7269829511642456, "learning_rate": 6.718367043993588e-05, "loss": 0.6804, "step": 4656 }, { "epoch": 0.8992083413786445, "grad_norm": 0.5050559639930725, "learning_rate": 6.716784541466514e-05, "loss": 0.6992, "step": 4657 }, { "epoch": 0.8994014288472678, "grad_norm": 0.8414682149887085, "learning_rate": 6.715201843956176e-05, "loss": 0.6442, "step": 4658 }, { "epoch": 0.899594516315891, "grad_norm": 0.8195832371711731, "learning_rate": 6.713618951642327e-05, "loss": 0.7356, "step": 4659 }, { "epoch": 0.8997876037845144, "grad_norm": 0.795860230922699, "learning_rate": 6.712035864704744e-05, "loss": 0.6008, "step": 4660 }, { "epoch": 0.8999806912531376, "grad_norm": 0.6596828699111938, "learning_rate": 6.710452583323226e-05, "loss": 0.6294, "step": 4661 }, { "epoch": 0.900173778721761, "grad_norm": 0.4829607605934143, "learning_rate": 6.708869107677592e-05, "loss": 0.6356, "step": 4662 }, { "epoch": 0.9003668661903842, "grad_norm": 0.5384283661842346, "learning_rate": 6.707285437947687e-05, "loss": 0.7591, "step": 4663 }, { "epoch": 0.9005599536590075, "grad_norm": 0.7231944799423218, "learning_rate": 6.705701574313377e-05, "loss": 0.6661, "step": 4664 }, { "epoch": 0.9007530411276308, "grad_norm": 1.3806986808776855, "learning_rate": 6.704117516954545e-05, "loss": 0.6514, "step": 4665 }, { "epoch": 0.9009461285962541, "grad_norm": 0.5876256227493286, "learning_rate": 6.702533266051098e-05, "loss": 0.6214, "step": 4666 }, { "epoch": 0.9011392160648773, "grad_norm": 0.6442005634307861, "learning_rate": 6.700948821782975e-05, "loss": 0.6393, "step": 4667 }, { "epoch": 0.9013323035335007, "grad_norm": 0.6558557152748108, "learning_rate": 6.699364184330122e-05, "loss": 0.6068, "step": 4668 }, { "epoch": 0.9015253910021239, "grad_norm": 0.6411159038543701, "learning_rate": 6.697779353872515e-05, "loss": 0.6383, "step": 4669 }, { "epoch": 0.9017184784707473, "grad_norm": 0.45706358551979065, "learning_rate": 6.696194330590151e-05, "loss": 0.5889, "step": 4670 }, { "epoch": 0.9019115659393705, "grad_norm": 0.6886022090911865, "learning_rate": 6.694609114663052e-05, "loss": 0.5977, "step": 4671 }, { "epoch": 0.9021046534079938, "grad_norm": 0.5828707814216614, "learning_rate": 6.693023706271252e-05, "loss": 0.6354, "step": 4672 }, { "epoch": 0.9022977408766171, "grad_norm": 1.7378995418548584, "learning_rate": 6.691438105594816e-05, "loss": 0.6397, "step": 4673 }, { "epoch": 0.9024908283452404, "grad_norm": 0.5950663089752197, "learning_rate": 6.689852312813831e-05, "loss": 0.6878, "step": 4674 }, { "epoch": 0.9026839158138636, "grad_norm": 1.149642825126648, "learning_rate": 6.6882663281084e-05, "loss": 0.6586, "step": 4675 }, { "epoch": 0.902877003282487, "grad_norm": 0.7290835380554199, "learning_rate": 6.68668015165865e-05, "loss": 0.6222, "step": 4676 }, { "epoch": 0.9030700907511102, "grad_norm": 0.4786348342895508, "learning_rate": 6.685093783644733e-05, "loss": 0.586, "step": 4677 }, { "epoch": 0.9032631782197336, "grad_norm": 0.807048499584198, "learning_rate": 6.683507224246819e-05, "loss": 0.6696, "step": 4678 }, { "epoch": 0.9034562656883568, "grad_norm": 0.9345008730888367, "learning_rate": 6.6819204736451e-05, "loss": 0.5849, "step": 4679 }, { "epoch": 0.9036493531569801, "grad_norm": 0.5625839233398438, "learning_rate": 6.680333532019792e-05, "loss": 0.609, "step": 4680 }, { "epoch": 0.9038424406256034, "grad_norm": 0.6821984648704529, "learning_rate": 6.678746399551132e-05, "loss": 0.6475, "step": 4681 }, { "epoch": 0.9040355280942267, "grad_norm": 0.4927162528038025, "learning_rate": 6.677159076419375e-05, "loss": 0.6576, "step": 4682 }, { "epoch": 0.90422861556285, "grad_norm": 1.006630539894104, "learning_rate": 6.675571562804805e-05, "loss": 0.6646, "step": 4683 }, { "epoch": 0.9044217030314733, "grad_norm": 0.9869152903556824, "learning_rate": 6.673983858887718e-05, "loss": 0.6379, "step": 4684 }, { "epoch": 0.9046147905000965, "grad_norm": 0.6686213612556458, "learning_rate": 6.672395964848446e-05, "loss": 0.6468, "step": 4685 }, { "epoch": 0.9048078779687199, "grad_norm": 1.0919142961502075, "learning_rate": 6.670807880867322e-05, "loss": 0.6843, "step": 4686 }, { "epoch": 0.9050009654373431, "grad_norm": 0.544529139995575, "learning_rate": 6.66921960712472e-05, "loss": 0.6254, "step": 4687 }, { "epoch": 0.9051940529059664, "grad_norm": 0.4948018491268158, "learning_rate": 6.667631143801027e-05, "loss": 0.6233, "step": 4688 }, { "epoch": 0.9053871403745897, "grad_norm": 0.923089861869812, "learning_rate": 6.666042491076649e-05, "loss": 0.6538, "step": 4689 }, { "epoch": 0.905580227843213, "grad_norm": 0.6880934238433838, "learning_rate": 6.664453649132018e-05, "loss": 0.6369, "step": 4690 }, { "epoch": 0.9057733153118362, "grad_norm": 1.6368008852005005, "learning_rate": 6.662864618147588e-05, "loss": 0.6239, "step": 4691 }, { "epoch": 0.9059664027804596, "grad_norm": 0.6904342174530029, "learning_rate": 6.661275398303831e-05, "loss": 0.6593, "step": 4692 }, { "epoch": 0.9061594902490828, "grad_norm": 0.6776862144470215, "learning_rate": 6.659685989781243e-05, "loss": 0.6722, "step": 4693 }, { "epoch": 0.9063525777177062, "grad_norm": 0.5798628330230713, "learning_rate": 6.65809639276034e-05, "loss": 0.6641, "step": 4694 }, { "epoch": 0.9065456651863294, "grad_norm": 0.7456169128417969, "learning_rate": 6.656506607421658e-05, "loss": 0.6199, "step": 4695 }, { "epoch": 0.9067387526549527, "grad_norm": 0.5556668639183044, "learning_rate": 6.654916633945762e-05, "loss": 0.6845, "step": 4696 }, { "epoch": 0.906931840123576, "grad_norm": 0.5843209624290466, "learning_rate": 6.653326472513229e-05, "loss": 0.6394, "step": 4697 }, { "epoch": 0.9071249275921993, "grad_norm": 0.6329046487808228, "learning_rate": 6.651736123304658e-05, "loss": 0.6981, "step": 4698 }, { "epoch": 0.9073180150608225, "grad_norm": 1.0749260187149048, "learning_rate": 6.650145586500677e-05, "loss": 0.6619, "step": 4699 }, { "epoch": 0.9075111025294459, "grad_norm": 0.599242627620697, "learning_rate": 6.648554862281933e-05, "loss": 0.6062, "step": 4700 }, { "epoch": 0.9077041899980691, "grad_norm": 0.5921893119812012, "learning_rate": 6.646963950829084e-05, "loss": 0.7048, "step": 4701 }, { "epoch": 0.9078972774666925, "grad_norm": 0.4375337064266205, "learning_rate": 6.645372852322824e-05, "loss": 0.6565, "step": 4702 }, { "epoch": 0.9080903649353157, "grad_norm": 0.39926379919052124, "learning_rate": 6.64378156694386e-05, "loss": 0.62, "step": 4703 }, { "epoch": 0.9082834524039389, "grad_norm": 0.6320896744728088, "learning_rate": 6.642190094872917e-05, "loss": 0.6328, "step": 4704 }, { "epoch": 0.9084765398725623, "grad_norm": 0.5261985063552856, "learning_rate": 6.640598436290755e-05, "loss": 0.6365, "step": 4705 }, { "epoch": 0.9086696273411855, "grad_norm": 0.48094889521598816, "learning_rate": 6.639006591378138e-05, "loss": 0.6252, "step": 4706 }, { "epoch": 0.9088627148098088, "grad_norm": 0.781934916973114, "learning_rate": 6.637414560315862e-05, "loss": 0.59, "step": 4707 }, { "epoch": 0.9090558022784321, "grad_norm": 0.5131605863571167, "learning_rate": 6.635822343284745e-05, "loss": 0.7035, "step": 4708 }, { "epoch": 0.9092488897470554, "grad_norm": 3.125812292098999, "learning_rate": 6.634229940465615e-05, "loss": 0.6182, "step": 4709 }, { "epoch": 0.9094419772156787, "grad_norm": 0.3803419768810272, "learning_rate": 6.632637352039335e-05, "loss": 0.5851, "step": 4710 }, { "epoch": 0.909635064684302, "grad_norm": 0.9441384077072144, "learning_rate": 6.631044578186782e-05, "loss": 0.6608, "step": 4711 }, { "epoch": 0.9098281521529252, "grad_norm": 0.7947547435760498, "learning_rate": 6.629451619088851e-05, "loss": 0.6539, "step": 4712 }, { "epoch": 0.9100212396215486, "grad_norm": 0.6201795935630798, "learning_rate": 6.627858474926467e-05, "loss": 0.5818, "step": 4713 }, { "epoch": 0.9102143270901718, "grad_norm": 4.897652626037598, "learning_rate": 6.626265145880567e-05, "loss": 0.6119, "step": 4714 }, { "epoch": 0.9104074145587951, "grad_norm": 0.5062997341156006, "learning_rate": 6.624671632132114e-05, "loss": 0.6446, "step": 4715 }, { "epoch": 0.9106005020274184, "grad_norm": 0.53565913438797, "learning_rate": 6.623077933862088e-05, "loss": 0.6397, "step": 4716 }, { "epoch": 0.9107935894960417, "grad_norm": 0.5721519589424133, "learning_rate": 6.6214840512515e-05, "loss": 0.692, "step": 4717 }, { "epoch": 0.910986676964665, "grad_norm": 0.6996158361434937, "learning_rate": 6.619889984481367e-05, "loss": 0.6877, "step": 4718 }, { "epoch": 0.9111797644332883, "grad_norm": 0.5720805525779724, "learning_rate": 6.61829573373274e-05, "loss": 0.6686, "step": 4719 }, { "epoch": 0.9113728519019115, "grad_norm": 1.2027949094772339, "learning_rate": 6.616701299186682e-05, "loss": 0.6412, "step": 4720 }, { "epoch": 0.9115659393705349, "grad_norm": 0.5787626504898071, "learning_rate": 6.615106681024284e-05, "loss": 0.6712, "step": 4721 }, { "epoch": 0.9117590268391581, "grad_norm": 0.583477258682251, "learning_rate": 6.61351187942665e-05, "loss": 0.581, "step": 4722 }, { "epoch": 0.9119521143077814, "grad_norm": 0.752662718296051, "learning_rate": 6.611916894574914e-05, "loss": 0.5695, "step": 4723 }, { "epoch": 0.9121452017764047, "grad_norm": 0.564978301525116, "learning_rate": 6.610321726650222e-05, "loss": 0.626, "step": 4724 }, { "epoch": 0.912338289245028, "grad_norm": 0.5975714921951294, "learning_rate": 6.608726375833744e-05, "loss": 0.6333, "step": 4725 }, { "epoch": 0.9125313767136513, "grad_norm": 0.49652305245399475, "learning_rate": 6.607130842306675e-05, "loss": 0.6379, "step": 4726 }, { "epoch": 0.9127244641822746, "grad_norm": 0.6739470958709717, "learning_rate": 6.605535126250226e-05, "loss": 0.5693, "step": 4727 }, { "epoch": 0.9129175516508978, "grad_norm": 1.0988364219665527, "learning_rate": 6.603939227845628e-05, "loss": 0.6501, "step": 4728 }, { "epoch": 0.9131106391195212, "grad_norm": 0.7017073035240173, "learning_rate": 6.602343147274135e-05, "loss": 0.58, "step": 4729 }, { "epoch": 0.9133037265881444, "grad_norm": 0.5377480387687683, "learning_rate": 6.600746884717024e-05, "loss": 0.6651, "step": 4730 }, { "epoch": 0.9134968140567677, "grad_norm": 0.8838604688644409, "learning_rate": 6.599150440355591e-05, "loss": 0.6679, "step": 4731 }, { "epoch": 0.913689901525391, "grad_norm": 0.8022592067718506, "learning_rate": 6.597553814371144e-05, "loss": 0.5883, "step": 4732 }, { "epoch": 0.9138829889940143, "grad_norm": 4.601982116699219, "learning_rate": 6.595957006945029e-05, "loss": 0.6011, "step": 4733 }, { "epoch": 0.9140760764626376, "grad_norm": 0.5978323817253113, "learning_rate": 6.594360018258596e-05, "loss": 0.5909, "step": 4734 }, { "epoch": 0.9142691639312609, "grad_norm": 0.9080562591552734, "learning_rate": 6.592762848493226e-05, "loss": 0.6356, "step": 4735 }, { "epoch": 0.9144622513998841, "grad_norm": 0.783962070941925, "learning_rate": 6.591165497830313e-05, "loss": 0.7417, "step": 4736 }, { "epoch": 0.9146553388685075, "grad_norm": 0.7491979002952576, "learning_rate": 6.589567966451283e-05, "loss": 0.5894, "step": 4737 }, { "epoch": 0.9148484263371307, "grad_norm": 0.6336752772331238, "learning_rate": 6.587970254537567e-05, "loss": 0.6621, "step": 4738 }, { "epoch": 0.915041513805754, "grad_norm": 1.5352725982666016, "learning_rate": 6.58637236227063e-05, "loss": 0.6306, "step": 4739 }, { "epoch": 0.9152346012743773, "grad_norm": 0.9222684502601624, "learning_rate": 6.58477428983195e-05, "loss": 0.6577, "step": 4740 }, { "epoch": 0.9154276887430006, "grad_norm": 0.6041208505630493, "learning_rate": 6.583176037403029e-05, "loss": 0.6346, "step": 4741 }, { "epoch": 0.9156207762116239, "grad_norm": 0.7149437069892883, "learning_rate": 6.581577605165385e-05, "loss": 0.6485, "step": 4742 }, { "epoch": 0.9158138636802472, "grad_norm": 0.46740633249282837, "learning_rate": 6.579978993300564e-05, "loss": 0.618, "step": 4743 }, { "epoch": 0.9160069511488704, "grad_norm": 0.9312482476234436, "learning_rate": 6.578380201990122e-05, "loss": 0.6121, "step": 4744 }, { "epoch": 0.9162000386174938, "grad_norm": 0.5678781270980835, "learning_rate": 6.576781231415647e-05, "loss": 0.6395, "step": 4745 }, { "epoch": 0.916393126086117, "grad_norm": 0.6318339705467224, "learning_rate": 6.575182081758739e-05, "loss": 0.6519, "step": 4746 }, { "epoch": 0.9165862135547403, "grad_norm": 0.843253493309021, "learning_rate": 6.573582753201018e-05, "loss": 0.609, "step": 4747 }, { "epoch": 0.9167793010233636, "grad_norm": 0.6796379089355469, "learning_rate": 6.571983245924134e-05, "loss": 0.579, "step": 4748 }, { "epoch": 0.9169723884919869, "grad_norm": 5.558284282684326, "learning_rate": 6.570383560109745e-05, "loss": 0.6302, "step": 4749 }, { "epoch": 0.9171654759606102, "grad_norm": 0.5469644665718079, "learning_rate": 6.568783695939535e-05, "loss": 0.6785, "step": 4750 }, { "epoch": 0.9173585634292335, "grad_norm": 0.993573784828186, "learning_rate": 6.567183653595212e-05, "loss": 0.6051, "step": 4751 }, { "epoch": 0.9175516508978567, "grad_norm": 0.7500694990158081, "learning_rate": 6.565583433258497e-05, "loss": 0.5942, "step": 4752 }, { "epoch": 0.91774473836648, "grad_norm": 1.0643538236618042, "learning_rate": 6.563983035111137e-05, "loss": 0.6822, "step": 4753 }, { "epoch": 0.9179378258351033, "grad_norm": 0.7451661229133606, "learning_rate": 6.562382459334894e-05, "loss": 0.728, "step": 4754 }, { "epoch": 0.9181309133037265, "grad_norm": 0.5481401681900024, "learning_rate": 6.560781706111553e-05, "loss": 0.6073, "step": 4755 }, { "epoch": 0.9183240007723499, "grad_norm": 0.8119422197341919, "learning_rate": 6.55918077562292e-05, "loss": 0.5403, "step": 4756 }, { "epoch": 0.9185170882409731, "grad_norm": 0.8692396879196167, "learning_rate": 6.557579668050823e-05, "loss": 0.6543, "step": 4757 }, { "epoch": 0.9187101757095965, "grad_norm": 0.7043238878250122, "learning_rate": 6.555978383577102e-05, "loss": 0.6134, "step": 4758 }, { "epoch": 0.9189032631782197, "grad_norm": 0.6248871684074402, "learning_rate": 6.554376922383626e-05, "loss": 0.6266, "step": 4759 }, { "epoch": 0.919096350646843, "grad_norm": 0.5816067457199097, "learning_rate": 6.552775284652279e-05, "loss": 0.6475, "step": 4760 }, { "epoch": 0.9192894381154663, "grad_norm": 0.6229172945022583, "learning_rate": 6.551173470564966e-05, "loss": 0.6004, "step": 4761 }, { "epoch": 0.9194825255840896, "grad_norm": 0.7177983522415161, "learning_rate": 6.549571480303615e-05, "loss": 0.612, "step": 4762 }, { "epoch": 0.9196756130527128, "grad_norm": 0.6820808053016663, "learning_rate": 6.54796931405017e-05, "loss": 0.6148, "step": 4763 }, { "epoch": 0.9198687005213362, "grad_norm": 0.7544858455657959, "learning_rate": 6.546366971986596e-05, "loss": 0.6449, "step": 4764 }, { "epoch": 0.9200617879899594, "grad_norm": 1.0186131000518799, "learning_rate": 6.544764454294878e-05, "loss": 0.6284, "step": 4765 }, { "epoch": 0.9202548754585828, "grad_norm": 1.1912658214569092, "learning_rate": 6.543161761157025e-05, "loss": 0.6423, "step": 4766 }, { "epoch": 0.920447962927206, "grad_norm": 0.8083844184875488, "learning_rate": 6.541558892755058e-05, "loss": 0.5811, "step": 4767 }, { "epoch": 0.9206410503958293, "grad_norm": 0.7346639037132263, "learning_rate": 6.539955849271025e-05, "loss": 0.6562, "step": 4768 }, { "epoch": 0.9208341378644526, "grad_norm": 0.9501969218254089, "learning_rate": 6.538352630886988e-05, "loss": 0.6263, "step": 4769 }, { "epoch": 0.9210272253330759, "grad_norm": 0.4926244914531708, "learning_rate": 6.536749237785036e-05, "loss": 0.6089, "step": 4770 }, { "epoch": 0.9212203128016991, "grad_norm": 0.5953173041343689, "learning_rate": 6.535145670147272e-05, "loss": 0.6228, "step": 4771 }, { "epoch": 0.9214134002703225, "grad_norm": 0.40004098415374756, "learning_rate": 6.533541928155821e-05, "loss": 0.5851, "step": 4772 }, { "epoch": 0.9216064877389457, "grad_norm": 1.1984134912490845, "learning_rate": 6.531938011992828e-05, "loss": 0.6047, "step": 4773 }, { "epoch": 0.921799575207569, "grad_norm": 3.615467071533203, "learning_rate": 6.530333921840455e-05, "loss": 0.69, "step": 4774 }, { "epoch": 0.9219926626761923, "grad_norm": 0.9216253757476807, "learning_rate": 6.528729657880889e-05, "loss": 0.669, "step": 4775 }, { "epoch": 0.9221857501448156, "grad_norm": 0.48817333579063416, "learning_rate": 6.527125220296331e-05, "loss": 0.7046, "step": 4776 }, { "epoch": 0.9223788376134389, "grad_norm": 0.7758028507232666, "learning_rate": 6.525520609269008e-05, "loss": 0.624, "step": 4777 }, { "epoch": 0.9225719250820622, "grad_norm": 1.6882319450378418, "learning_rate": 6.523915824981159e-05, "loss": 0.6625, "step": 4778 }, { "epoch": 0.9227650125506854, "grad_norm": 0.5567895770072937, "learning_rate": 6.522310867615049e-05, "loss": 0.533, "step": 4779 }, { "epoch": 0.9229581000193088, "grad_norm": 0.7079700231552124, "learning_rate": 6.520705737352961e-05, "loss": 0.6603, "step": 4780 }, { "epoch": 0.923151187487932, "grad_norm": 0.9313176870346069, "learning_rate": 6.519100434377197e-05, "loss": 0.6402, "step": 4781 }, { "epoch": 0.9233442749565554, "grad_norm": 1.6055926084518433, "learning_rate": 6.517494958870079e-05, "loss": 0.6241, "step": 4782 }, { "epoch": 0.9235373624251786, "grad_norm": 0.6738874316215515, "learning_rate": 6.515889311013948e-05, "loss": 0.6136, "step": 4783 }, { "epoch": 0.9237304498938019, "grad_norm": 0.9310223460197449, "learning_rate": 6.514283490991164e-05, "loss": 0.5988, "step": 4784 }, { "epoch": 0.9239235373624252, "grad_norm": 0.602460503578186, "learning_rate": 6.512677498984107e-05, "loss": 0.6163, "step": 4785 }, { "epoch": 0.9241166248310485, "grad_norm": 0.8284865021705627, "learning_rate": 6.51107133517518e-05, "loss": 0.6097, "step": 4786 }, { "epoch": 0.9243097122996717, "grad_norm": 0.7055022716522217, "learning_rate": 6.509464999746801e-05, "loss": 0.5939, "step": 4787 }, { "epoch": 0.9245027997682951, "grad_norm": 0.6605414152145386, "learning_rate": 6.507858492881409e-05, "loss": 0.6676, "step": 4788 }, { "epoch": 0.9246958872369183, "grad_norm": 0.5515486598014832, "learning_rate": 6.506251814761461e-05, "loss": 0.6748, "step": 4789 }, { "epoch": 0.9248889747055417, "grad_norm": 0.3982152044773102, "learning_rate": 6.504644965569437e-05, "loss": 0.6017, "step": 4790 }, { "epoch": 0.9250820621741649, "grad_norm": 0.6344628930091858, "learning_rate": 6.503037945487836e-05, "loss": 0.5693, "step": 4791 }, { "epoch": 0.9252751496427882, "grad_norm": 0.8265718817710876, "learning_rate": 6.501430754699168e-05, "loss": 0.6554, "step": 4792 }, { "epoch": 0.9254682371114115, "grad_norm": 0.7160733342170715, "learning_rate": 6.499823393385977e-05, "loss": 0.6449, "step": 4793 }, { "epoch": 0.9256613245800348, "grad_norm": 0.9000993967056274, "learning_rate": 6.498215861730814e-05, "loss": 0.6139, "step": 4794 }, { "epoch": 0.925854412048658, "grad_norm": 0.7573832869529724, "learning_rate": 6.496608159916257e-05, "loss": 0.6254, "step": 4795 }, { "epoch": 0.9260474995172814, "grad_norm": 3.265752077102661, "learning_rate": 6.495000288124896e-05, "loss": 0.6097, "step": 4796 }, { "epoch": 0.9262405869859046, "grad_norm": 0.9964376091957092, "learning_rate": 6.493392246539348e-05, "loss": 0.6301, "step": 4797 }, { "epoch": 0.926433674454528, "grad_norm": 4.168309688568115, "learning_rate": 6.491784035342246e-05, "loss": 0.6751, "step": 4798 }, { "epoch": 0.9266267619231512, "grad_norm": 1.3249537944793701, "learning_rate": 6.49017565471624e-05, "loss": 0.6605, "step": 4799 }, { "epoch": 0.9268198493917744, "grad_norm": 0.5634281039237976, "learning_rate": 6.488567104844001e-05, "loss": 0.5731, "step": 4800 }, { "epoch": 0.9270129368603978, "grad_norm": 0.6237615942955017, "learning_rate": 6.486958385908222e-05, "loss": 0.658, "step": 4801 }, { "epoch": 0.927206024329021, "grad_norm": 1.261174201965332, "learning_rate": 6.485349498091612e-05, "loss": 0.6641, "step": 4802 }, { "epoch": 0.9273991117976443, "grad_norm": 0.5932879447937012, "learning_rate": 6.483740441576898e-05, "loss": 0.6056, "step": 4803 }, { "epoch": 0.9275921992662676, "grad_norm": 0.8270384669303894, "learning_rate": 6.48213121654683e-05, "loss": 0.6611, "step": 4804 }, { "epoch": 0.9277852867348909, "grad_norm": 0.9421452879905701, "learning_rate": 6.480521823184176e-05, "loss": 0.6242, "step": 4805 }, { "epoch": 0.9279783742035141, "grad_norm": 1.83882737159729, "learning_rate": 6.47891226167172e-05, "loss": 0.5936, "step": 4806 }, { "epoch": 0.9281714616721375, "grad_norm": 0.7708373665809631, "learning_rate": 6.477302532192268e-05, "loss": 0.6423, "step": 4807 }, { "epoch": 0.9283645491407607, "grad_norm": 0.978569507598877, "learning_rate": 6.475692634928648e-05, "loss": 0.5648, "step": 4808 }, { "epoch": 0.9285576366093841, "grad_norm": 0.4620639681816101, "learning_rate": 6.474082570063698e-05, "loss": 0.5907, "step": 4809 }, { "epoch": 0.9287507240780073, "grad_norm": 0.7192492485046387, "learning_rate": 6.472472337780282e-05, "loss": 0.6461, "step": 4810 }, { "epoch": 0.9289438115466306, "grad_norm": 1.1550191640853882, "learning_rate": 6.470861938261286e-05, "loss": 0.6597, "step": 4811 }, { "epoch": 0.9291368990152539, "grad_norm": 0.5866570472717285, "learning_rate": 6.469251371689606e-05, "loss": 0.6862, "step": 4812 }, { "epoch": 0.9293299864838772, "grad_norm": 0.7910025715827942, "learning_rate": 6.467640638248162e-05, "loss": 0.6092, "step": 4813 }, { "epoch": 0.9295230739525004, "grad_norm": 0.547635555267334, "learning_rate": 6.466029738119895e-05, "loss": 0.6033, "step": 4814 }, { "epoch": 0.9297161614211238, "grad_norm": 0.5493137836456299, "learning_rate": 6.46441867148776e-05, "loss": 0.6456, "step": 4815 }, { "epoch": 0.929909248889747, "grad_norm": 1.0441820621490479, "learning_rate": 6.462807438534736e-05, "loss": 0.6019, "step": 4816 }, { "epoch": 0.9301023363583704, "grad_norm": 0.700988233089447, "learning_rate": 6.461196039443817e-05, "loss": 0.68, "step": 4817 }, { "epoch": 0.9302954238269936, "grad_norm": 0.6549000144004822, "learning_rate": 6.459584474398014e-05, "loss": 0.6822, "step": 4818 }, { "epoch": 0.9304885112956169, "grad_norm": 0.6649519801139832, "learning_rate": 6.457972743580366e-05, "loss": 0.6675, "step": 4819 }, { "epoch": 0.9306815987642402, "grad_norm": 1.053505301475525, "learning_rate": 6.45636084717392e-05, "loss": 0.6511, "step": 4820 }, { "epoch": 0.9308746862328635, "grad_norm": 0.5325350165367126, "learning_rate": 6.454748785361748e-05, "loss": 0.6029, "step": 4821 }, { "epoch": 0.9310677737014867, "grad_norm": 0.5647165179252625, "learning_rate": 6.453136558326941e-05, "loss": 0.6501, "step": 4822 }, { "epoch": 0.9312608611701101, "grad_norm": 0.7871409058570862, "learning_rate": 6.451524166252607e-05, "loss": 0.6199, "step": 4823 }, { "epoch": 0.9314539486387333, "grad_norm": 0.722640335559845, "learning_rate": 6.44991160932187e-05, "loss": 0.6436, "step": 4824 }, { "epoch": 0.9316470361073567, "grad_norm": 0.7258586883544922, "learning_rate": 6.448298887717877e-05, "loss": 0.6171, "step": 4825 }, { "epoch": 0.9318401235759799, "grad_norm": 0.6786913871765137, "learning_rate": 6.446686001623794e-05, "loss": 0.6421, "step": 4826 }, { "epoch": 0.9320332110446032, "grad_norm": 1.0217431783676147, "learning_rate": 6.445072951222803e-05, "loss": 0.6003, "step": 4827 }, { "epoch": 0.9322262985132265, "grad_norm": 0.5867282152175903, "learning_rate": 6.443459736698105e-05, "loss": 0.6276, "step": 4828 }, { "epoch": 0.9324193859818498, "grad_norm": 1.5313537120819092, "learning_rate": 6.441846358232921e-05, "loss": 0.6307, "step": 4829 }, { "epoch": 0.932612473450473, "grad_norm": 0.5658100843429565, "learning_rate": 6.440232816010491e-05, "loss": 0.5967, "step": 4830 }, { "epoch": 0.9328055609190964, "grad_norm": 0.4959971606731415, "learning_rate": 6.438619110214072e-05, "loss": 0.6423, "step": 4831 }, { "epoch": 0.9329986483877196, "grad_norm": 0.6899530291557312, "learning_rate": 6.437005241026937e-05, "loss": 0.62, "step": 4832 }, { "epoch": 0.933191735856343, "grad_norm": 2.059631586074829, "learning_rate": 6.435391208632384e-05, "loss": 0.6266, "step": 4833 }, { "epoch": 0.9333848233249662, "grad_norm": 0.6271259784698486, "learning_rate": 6.433777013213727e-05, "loss": 0.6135, "step": 4834 }, { "epoch": 0.9335779107935895, "grad_norm": 1.024789571762085, "learning_rate": 6.432162654954293e-05, "loss": 0.6409, "step": 4835 }, { "epoch": 0.9337709982622128, "grad_norm": 1.5584626197814941, "learning_rate": 6.430548134037438e-05, "loss": 0.6066, "step": 4836 }, { "epoch": 0.9339640857308361, "grad_norm": 0.76207435131073, "learning_rate": 6.428933450646529e-05, "loss": 0.634, "step": 4837 }, { "epoch": 0.9341571731994593, "grad_norm": 0.585892379283905, "learning_rate": 6.42731860496495e-05, "loss": 0.6896, "step": 4838 }, { "epoch": 0.9343502606680827, "grad_norm": 0.5609655976295471, "learning_rate": 6.425703597176108e-05, "loss": 0.6645, "step": 4839 }, { "epoch": 0.9345433481367059, "grad_norm": 0.7209137678146362, "learning_rate": 6.42408842746343e-05, "loss": 0.6079, "step": 4840 }, { "epoch": 0.9347364356053293, "grad_norm": 1.2983510494232178, "learning_rate": 6.422473096010358e-05, "loss": 0.5532, "step": 4841 }, { "epoch": 0.9349295230739525, "grad_norm": 0.9299805760383606, "learning_rate": 6.420857603000347e-05, "loss": 0.6605, "step": 4842 }, { "epoch": 0.9351226105425758, "grad_norm": 1.261780858039856, "learning_rate": 6.419241948616884e-05, "loss": 0.643, "step": 4843 }, { "epoch": 0.9353156980111991, "grad_norm": 1.2555617094039917, "learning_rate": 6.417626133043459e-05, "loss": 0.6553, "step": 4844 }, { "epoch": 0.9355087854798224, "grad_norm": 0.8511378169059753, "learning_rate": 6.416010156463593e-05, "loss": 0.6649, "step": 4845 }, { "epoch": 0.9357018729484456, "grad_norm": 1.5680063962936401, "learning_rate": 6.414394019060819e-05, "loss": 0.6341, "step": 4846 }, { "epoch": 0.9358949604170689, "grad_norm": 0.7323397397994995, "learning_rate": 6.412777721018687e-05, "loss": 0.6196, "step": 4847 }, { "epoch": 0.9360880478856922, "grad_norm": 0.9430294632911682, "learning_rate": 6.411161262520771e-05, "loss": 0.66, "step": 4848 }, { "epoch": 0.9362811353543155, "grad_norm": 1.2058351039886475, "learning_rate": 6.409544643750658e-05, "loss": 0.6301, "step": 4849 }, { "epoch": 0.9364742228229388, "grad_norm": 0.8159075975418091, "learning_rate": 6.407927864891952e-05, "loss": 0.7085, "step": 4850 }, { "epoch": 0.936667310291562, "grad_norm": 0.7201288342475891, "learning_rate": 6.406310926128286e-05, "loss": 0.633, "step": 4851 }, { "epoch": 0.9368603977601854, "grad_norm": 1.1732721328735352, "learning_rate": 6.404693827643294e-05, "loss": 0.7206, "step": 4852 }, { "epoch": 0.9370534852288086, "grad_norm": 0.8583370447158813, "learning_rate": 6.403076569620643e-05, "loss": 0.6104, "step": 4853 }, { "epoch": 0.937246572697432, "grad_norm": 0.6903106570243835, "learning_rate": 6.401459152244012e-05, "loss": 0.671, "step": 4854 }, { "epoch": 0.9374396601660552, "grad_norm": 0.762135922908783, "learning_rate": 6.399841575697098e-05, "loss": 0.6389, "step": 4855 }, { "epoch": 0.9376327476346785, "grad_norm": 1.8000051975250244, "learning_rate": 6.398223840163617e-05, "loss": 0.6742, "step": 4856 }, { "epoch": 0.9378258351033018, "grad_norm": 0.6325253248214722, "learning_rate": 6.396605945827303e-05, "loss": 0.6645, "step": 4857 }, { "epoch": 0.9380189225719251, "grad_norm": 1.5053006410598755, "learning_rate": 6.394987892871908e-05, "loss": 0.653, "step": 4858 }, { "epoch": 0.9382120100405483, "grad_norm": 0.6970574259757996, "learning_rate": 6.3933696814812e-05, "loss": 0.5654, "step": 4859 }, { "epoch": 0.9384050975091717, "grad_norm": 0.9356146454811096, "learning_rate": 6.391751311838969e-05, "loss": 0.6701, "step": 4860 }, { "epoch": 0.9385981849777949, "grad_norm": 0.7593804597854614, "learning_rate": 6.390132784129018e-05, "loss": 0.6191, "step": 4861 }, { "epoch": 0.9387912724464182, "grad_norm": 0.6035736203193665, "learning_rate": 6.388514098535176e-05, "loss": 0.6482, "step": 4862 }, { "epoch": 0.9389843599150415, "grad_norm": 1.0875885486602783, "learning_rate": 6.38689525524128e-05, "loss": 0.7447, "step": 4863 }, { "epoch": 0.9391774473836648, "grad_norm": 0.8019682168960571, "learning_rate": 6.38527625443119e-05, "loss": 0.6297, "step": 4864 }, { "epoch": 0.939370534852288, "grad_norm": 1.0868767499923706, "learning_rate": 6.383657096288787e-05, "loss": 0.7031, "step": 4865 }, { "epoch": 0.9395636223209114, "grad_norm": 0.5537790060043335, "learning_rate": 6.382037780997963e-05, "loss": 0.6597, "step": 4866 }, { "epoch": 0.9397567097895346, "grad_norm": 0.7462095618247986, "learning_rate": 6.380418308742631e-05, "loss": 0.5993, "step": 4867 }, { "epoch": 0.939949797258158, "grad_norm": 0.7065976858139038, "learning_rate": 6.378798679706723e-05, "loss": 0.6409, "step": 4868 }, { "epoch": 0.9401428847267812, "grad_norm": 1.7113165855407715, "learning_rate": 6.377178894074192e-05, "loss": 0.6714, "step": 4869 }, { "epoch": 0.9403359721954045, "grad_norm": 0.870890736579895, "learning_rate": 6.375558952028997e-05, "loss": 0.6227, "step": 4870 }, { "epoch": 0.9405290596640278, "grad_norm": 0.7746900916099548, "learning_rate": 6.373938853755126e-05, "loss": 0.652, "step": 4871 }, { "epoch": 0.9407221471326511, "grad_norm": 0.9241239428520203, "learning_rate": 6.372318599436584e-05, "loss": 0.6284, "step": 4872 }, { "epoch": 0.9409152346012744, "grad_norm": 0.935937225818634, "learning_rate": 6.370698189257387e-05, "loss": 0.6336, "step": 4873 }, { "epoch": 0.9411083220698977, "grad_norm": 0.6092314124107361, "learning_rate": 6.369077623401574e-05, "loss": 0.7047, "step": 4874 }, { "epoch": 0.9413014095385209, "grad_norm": 0.6177489161491394, "learning_rate": 6.367456902053199e-05, "loss": 0.636, "step": 4875 }, { "epoch": 0.9414944970071443, "grad_norm": 0.5584657192230225, "learning_rate": 6.365836025396337e-05, "loss": 0.6643, "step": 4876 }, { "epoch": 0.9416875844757675, "grad_norm": 0.5882404446601868, "learning_rate": 6.364214993615079e-05, "loss": 0.6311, "step": 4877 }, { "epoch": 0.9418806719443908, "grad_norm": 0.8276442289352417, "learning_rate": 6.362593806893529e-05, "loss": 0.6266, "step": 4878 }, { "epoch": 0.9420737594130141, "grad_norm": 0.7047929763793945, "learning_rate": 6.360972465415817e-05, "loss": 0.5995, "step": 4879 }, { "epoch": 0.9422668468816374, "grad_norm": 1.2341630458831787, "learning_rate": 6.359350969366085e-05, "loss": 0.6372, "step": 4880 }, { "epoch": 0.9424599343502607, "grad_norm": 0.4987041652202606, "learning_rate": 6.357729318928492e-05, "loss": 0.6485, "step": 4881 }, { "epoch": 0.942653021818884, "grad_norm": 0.5146585702896118, "learning_rate": 6.35610751428722e-05, "loss": 0.7167, "step": 4882 }, { "epoch": 0.9428461092875072, "grad_norm": 0.5016558170318604, "learning_rate": 6.354485555626463e-05, "loss": 0.6741, "step": 4883 }, { "epoch": 0.9430391967561306, "grad_norm": 0.7618740797042847, "learning_rate": 6.352863443130433e-05, "loss": 0.5812, "step": 4884 }, { "epoch": 0.9432322842247538, "grad_norm": 0.8907603025436401, "learning_rate": 6.351241176983364e-05, "loss": 0.6515, "step": 4885 }, { "epoch": 0.9434253716933771, "grad_norm": 0.5471984148025513, "learning_rate": 6.349618757369502e-05, "loss": 0.6412, "step": 4886 }, { "epoch": 0.9436184591620004, "grad_norm": 0.6254308223724365, "learning_rate": 6.347996184473115e-05, "loss": 0.639, "step": 4887 }, { "epoch": 0.9438115466306237, "grad_norm": 0.4472064971923828, "learning_rate": 6.346373458478483e-05, "loss": 0.6055, "step": 4888 }, { "epoch": 0.944004634099247, "grad_norm": 1.4573874473571777, "learning_rate": 6.34475057956991e-05, "loss": 0.6602, "step": 4889 }, { "epoch": 0.9441977215678703, "grad_norm": 0.8663402795791626, "learning_rate": 6.343127547931712e-05, "loss": 0.6332, "step": 4890 }, { "epoch": 0.9443908090364935, "grad_norm": 0.527857780456543, "learning_rate": 6.341504363748225e-05, "loss": 0.5638, "step": 4891 }, { "epoch": 0.9445838965051169, "grad_norm": 0.676457941532135, "learning_rate": 6.3398810272038e-05, "loss": 0.6532, "step": 4892 }, { "epoch": 0.9447769839737401, "grad_norm": 1.430505394935608, "learning_rate": 6.33825753848281e-05, "loss": 0.6341, "step": 4893 }, { "epoch": 0.9449700714423633, "grad_norm": 0.6682031750679016, "learning_rate": 6.33663389776964e-05, "loss": 0.6543, "step": 4894 }, { "epoch": 0.9451631589109867, "grad_norm": 0.8447955846786499, "learning_rate": 6.335010105248694e-05, "loss": 0.607, "step": 4895 }, { "epoch": 0.9453562463796099, "grad_norm": 1.1479833126068115, "learning_rate": 6.333386161104396e-05, "loss": 0.6659, "step": 4896 }, { "epoch": 0.9455493338482333, "grad_norm": 0.5162709355354309, "learning_rate": 6.331762065521184e-05, "loss": 0.6955, "step": 4897 }, { "epoch": 0.9457424213168565, "grad_norm": 0.6581130623817444, "learning_rate": 6.330137818683513e-05, "loss": 0.5887, "step": 4898 }, { "epoch": 0.9459355087854798, "grad_norm": 1.3187711238861084, "learning_rate": 6.328513420775856e-05, "loss": 0.6269, "step": 4899 }, { "epoch": 0.9461285962541031, "grad_norm": 0.7514766454696655, "learning_rate": 6.326888871982705e-05, "loss": 0.6658, "step": 4900 }, { "epoch": 0.9463216837227264, "grad_norm": 0.7644774913787842, "learning_rate": 6.32526417248857e-05, "loss": 0.6196, "step": 4901 }, { "epoch": 0.9465147711913496, "grad_norm": 1.4700286388397217, "learning_rate": 6.32363932247797e-05, "loss": 0.6613, "step": 4902 }, { "epoch": 0.946707858659973, "grad_norm": 0.9842969179153442, "learning_rate": 6.322014322135452e-05, "loss": 0.5776, "step": 4903 }, { "epoch": 0.9469009461285962, "grad_norm": 1.584343671798706, "learning_rate": 6.32038917164557e-05, "loss": 0.6524, "step": 4904 }, { "epoch": 0.9470940335972196, "grad_norm": 0.7207793593406677, "learning_rate": 6.318763871192905e-05, "loss": 0.6302, "step": 4905 }, { "epoch": 0.9472871210658428, "grad_norm": 0.6645573377609253, "learning_rate": 6.317138420962048e-05, "loss": 0.6179, "step": 4906 }, { "epoch": 0.9474802085344661, "grad_norm": 1.0651065111160278, "learning_rate": 6.315512821137606e-05, "loss": 0.6491, "step": 4907 }, { "epoch": 0.9476732960030894, "grad_norm": 0.5076994299888611, "learning_rate": 6.31388707190421e-05, "loss": 0.648, "step": 4908 }, { "epoch": 0.9478663834717127, "grad_norm": 0.6591524481773376, "learning_rate": 6.312261173446504e-05, "loss": 0.653, "step": 4909 }, { "epoch": 0.9480594709403359, "grad_norm": 0.7312628030776978, "learning_rate": 6.310635125949145e-05, "loss": 0.6067, "step": 4910 }, { "epoch": 0.9482525584089593, "grad_norm": 0.7072637677192688, "learning_rate": 6.309008929596816e-05, "loss": 0.6339, "step": 4911 }, { "epoch": 0.9484456458775825, "grad_norm": 0.8097092509269714, "learning_rate": 6.307382584574209e-05, "loss": 0.6379, "step": 4912 }, { "epoch": 0.9486387333462059, "grad_norm": 0.8148403167724609, "learning_rate": 6.305756091066033e-05, "loss": 0.6373, "step": 4913 }, { "epoch": 0.9488318208148291, "grad_norm": 1.1057246923446655, "learning_rate": 6.304129449257022e-05, "loss": 0.635, "step": 4914 }, { "epoch": 0.9490249082834524, "grad_norm": 1.7384101152420044, "learning_rate": 6.302502659331917e-05, "loss": 0.6923, "step": 4915 }, { "epoch": 0.9492179957520757, "grad_norm": 0.6786219477653503, "learning_rate": 6.300875721475481e-05, "loss": 0.5993, "step": 4916 }, { "epoch": 0.949411083220699, "grad_norm": 0.8135479092597961, "learning_rate": 6.299248635872495e-05, "loss": 0.68, "step": 4917 }, { "epoch": 0.9496041706893222, "grad_norm": 2.7911202907562256, "learning_rate": 6.297621402707754e-05, "loss": 0.5982, "step": 4918 }, { "epoch": 0.9497972581579456, "grad_norm": 1.0440798997879028, "learning_rate": 6.295994022166068e-05, "loss": 0.6072, "step": 4919 }, { "epoch": 0.9499903456265688, "grad_norm": 0.5690823793411255, "learning_rate": 6.29436649443227e-05, "loss": 0.6526, "step": 4920 }, { "epoch": 0.9501834330951922, "grad_norm": 0.5746780633926392, "learning_rate": 6.292738819691202e-05, "loss": 0.6699, "step": 4921 }, { "epoch": 0.9503765205638154, "grad_norm": 0.7736198306083679, "learning_rate": 6.291110998127731e-05, "loss": 0.5834, "step": 4922 }, { "epoch": 0.9505696080324387, "grad_norm": 0.6776920557022095, "learning_rate": 6.289483029926733e-05, "loss": 0.6687, "step": 4923 }, { "epoch": 0.950762695501062, "grad_norm": 0.7889087796211243, "learning_rate": 6.287854915273104e-05, "loss": 0.5667, "step": 4924 }, { "epoch": 0.9509557829696853, "grad_norm": 1.1469900608062744, "learning_rate": 6.286226654351759e-05, "loss": 0.65, "step": 4925 }, { "epoch": 0.9511488704383085, "grad_norm": 0.6769804358482361, "learning_rate": 6.284598247347626e-05, "loss": 0.5969, "step": 4926 }, { "epoch": 0.9513419579069319, "grad_norm": 0.7303265929222107, "learning_rate": 6.28296969444565e-05, "loss": 0.6261, "step": 4927 }, { "epoch": 0.9515350453755551, "grad_norm": 1.3576974868774414, "learning_rate": 6.281340995830793e-05, "loss": 0.6793, "step": 4928 }, { "epoch": 0.9517281328441785, "grad_norm": 0.8031749129295349, "learning_rate": 6.279712151688038e-05, "loss": 0.6336, "step": 4929 }, { "epoch": 0.9519212203128017, "grad_norm": 0.7721862196922302, "learning_rate": 6.278083162202375e-05, "loss": 0.643, "step": 4930 }, { "epoch": 0.952114307781425, "grad_norm": 0.8901581168174744, "learning_rate": 6.276454027558817e-05, "loss": 0.6721, "step": 4931 }, { "epoch": 0.9523073952500483, "grad_norm": 1.155354380607605, "learning_rate": 6.274824747942396e-05, "loss": 0.6573, "step": 4932 }, { "epoch": 0.9525004827186716, "grad_norm": 0.8206495046615601, "learning_rate": 6.273195323538155e-05, "loss": 0.612, "step": 4933 }, { "epoch": 0.9526935701872948, "grad_norm": 0.7173230051994324, "learning_rate": 6.271565754531155e-05, "loss": 0.6124, "step": 4934 }, { "epoch": 0.9528866576559182, "grad_norm": 0.7100188732147217, "learning_rate": 6.269936041106474e-05, "loss": 0.64, "step": 4935 }, { "epoch": 0.9530797451245414, "grad_norm": 0.880888044834137, "learning_rate": 6.268306183449206e-05, "loss": 0.7351, "step": 4936 }, { "epoch": 0.9532728325931648, "grad_norm": 0.5764870047569275, "learning_rate": 6.266676181744462e-05, "loss": 0.5723, "step": 4937 }, { "epoch": 0.953465920061788, "grad_norm": 0.7114074230194092, "learning_rate": 6.265046036177366e-05, "loss": 0.6903, "step": 4938 }, { "epoch": 0.9536590075304113, "grad_norm": 1.024485468864441, "learning_rate": 6.263415746933066e-05, "loss": 0.6137, "step": 4939 }, { "epoch": 0.9538520949990346, "grad_norm": 0.9974509477615356, "learning_rate": 6.261785314196722e-05, "loss": 0.6298, "step": 4940 }, { "epoch": 0.9540451824676579, "grad_norm": 0.6170519590377808, "learning_rate": 6.260154738153503e-05, "loss": 0.6738, "step": 4941 }, { "epoch": 0.9542382699362811, "grad_norm": 0.6121035218238831, "learning_rate": 6.258524018988607e-05, "loss": 0.5808, "step": 4942 }, { "epoch": 0.9544313574049044, "grad_norm": 0.7769229412078857, "learning_rate": 6.256893156887242e-05, "loss": 0.616, "step": 4943 }, { "epoch": 0.9546244448735277, "grad_norm": 0.6250663995742798, "learning_rate": 6.25526215203463e-05, "loss": 0.6548, "step": 4944 }, { "epoch": 0.9548175323421509, "grad_norm": 0.8071849346160889, "learning_rate": 6.253631004616014e-05, "loss": 0.6951, "step": 4945 }, { "epoch": 0.9550106198107743, "grad_norm": 6.10567045211792, "learning_rate": 6.251999714816652e-05, "loss": 0.6914, "step": 4946 }, { "epoch": 0.9552037072793975, "grad_norm": 1.0403647422790527, "learning_rate": 6.250368282821814e-05, "loss": 0.6632, "step": 4947 }, { "epoch": 0.9553967947480209, "grad_norm": 0.7810744643211365, "learning_rate": 6.248736708816793e-05, "loss": 0.6142, "step": 4948 }, { "epoch": 0.9555898822166441, "grad_norm": 1.0096209049224854, "learning_rate": 6.24710499298689e-05, "loss": 0.5937, "step": 4949 }, { "epoch": 0.9557829696852674, "grad_norm": 2.639371395111084, "learning_rate": 6.245473135517432e-05, "loss": 0.5861, "step": 4950 }, { "epoch": 0.9559760571538907, "grad_norm": 0.5932890176773071, "learning_rate": 6.243841136593752e-05, "loss": 0.5841, "step": 4951 }, { "epoch": 0.956169144622514, "grad_norm": 0.8986606001853943, "learning_rate": 6.242208996401206e-05, "loss": 0.531, "step": 4952 }, { "epoch": 0.9563622320911372, "grad_norm": 2.166045665740967, "learning_rate": 6.240576715125163e-05, "loss": 0.6919, "step": 4953 }, { "epoch": 0.9565553195597606, "grad_norm": 0.9323033094406128, "learning_rate": 6.238944292951011e-05, "loss": 0.6933, "step": 4954 }, { "epoch": 0.9567484070283838, "grad_norm": 0.7580961585044861, "learning_rate": 6.23731173006415e-05, "loss": 0.6582, "step": 4955 }, { "epoch": 0.9569414944970072, "grad_norm": 3.18131422996521, "learning_rate": 6.235679026649997e-05, "loss": 0.6184, "step": 4956 }, { "epoch": 0.9571345819656304, "grad_norm": 0.6323263645172119, "learning_rate": 6.234046182893988e-05, "loss": 0.6065, "step": 4957 }, { "epoch": 0.9573276694342537, "grad_norm": 0.6356973052024841, "learning_rate": 6.23241319898157e-05, "loss": 0.6168, "step": 4958 }, { "epoch": 0.957520756902877, "grad_norm": 0.7873768210411072, "learning_rate": 6.230780075098211e-05, "loss": 0.6401, "step": 4959 }, { "epoch": 0.9577138443715003, "grad_norm": 1.1240898370742798, "learning_rate": 6.229146811429391e-05, "loss": 0.6492, "step": 4960 }, { "epoch": 0.9579069318401235, "grad_norm": 0.8622150421142578, "learning_rate": 6.227513408160608e-05, "loss": 0.5879, "step": 4961 }, { "epoch": 0.9581000193087469, "grad_norm": 0.954438328742981, "learning_rate": 6.225879865477377e-05, "loss": 0.6388, "step": 4962 }, { "epoch": 0.9582931067773701, "grad_norm": 1.2238973379135132, "learning_rate": 6.224246183565225e-05, "loss": 0.6189, "step": 4963 }, { "epoch": 0.9584861942459935, "grad_norm": 0.7478033304214478, "learning_rate": 6.222612362609696e-05, "loss": 0.6494, "step": 4964 }, { "epoch": 0.9586792817146167, "grad_norm": 0.6067814826965332, "learning_rate": 6.220978402796352e-05, "loss": 0.5833, "step": 4965 }, { "epoch": 0.95887236918324, "grad_norm": 2.0937442779541016, "learning_rate": 6.219344304310771e-05, "loss": 0.621, "step": 4966 }, { "epoch": 0.9590654566518633, "grad_norm": 1.1522780656814575, "learning_rate": 6.217710067338542e-05, "loss": 0.5989, "step": 4967 }, { "epoch": 0.9592585441204866, "grad_norm": 0.8181199431419373, "learning_rate": 6.216075692065276e-05, "loss": 0.689, "step": 4968 }, { "epoch": 0.9594516315891098, "grad_norm": 1.3889575004577637, "learning_rate": 6.214441178676596e-05, "loss": 0.6708, "step": 4969 }, { "epoch": 0.9596447190577332, "grad_norm": 0.583719789981842, "learning_rate": 6.212806527358139e-05, "loss": 0.6236, "step": 4970 }, { "epoch": 0.9598378065263564, "grad_norm": 0.9152575731277466, "learning_rate": 6.211171738295563e-05, "loss": 0.6309, "step": 4971 }, { "epoch": 0.9600308939949798, "grad_norm": 0.5666991472244263, "learning_rate": 6.209536811674537e-05, "loss": 0.6189, "step": 4972 }, { "epoch": 0.960223981463603, "grad_norm": 3.773449420928955, "learning_rate": 6.207901747680746e-05, "loss": 0.6277, "step": 4973 }, { "epoch": 0.9604170689322263, "grad_norm": 0.9815205335617065, "learning_rate": 6.206266546499896e-05, "loss": 0.6872, "step": 4974 }, { "epoch": 0.9606101564008496, "grad_norm": 0.8018364310264587, "learning_rate": 6.2046312083177e-05, "loss": 0.7158, "step": 4975 }, { "epoch": 0.9608032438694729, "grad_norm": 1.1922768354415894, "learning_rate": 6.202995733319895e-05, "loss": 0.6304, "step": 4976 }, { "epoch": 0.9609963313380961, "grad_norm": 0.856354832649231, "learning_rate": 6.201360121692225e-05, "loss": 0.6737, "step": 4977 }, { "epoch": 0.9611894188067195, "grad_norm": 0.6936930418014526, "learning_rate": 6.19972437362046e-05, "loss": 0.6538, "step": 4978 }, { "epoch": 0.9613825062753427, "grad_norm": 4.225619316101074, "learning_rate": 6.198088489290373e-05, "loss": 0.5961, "step": 4979 }, { "epoch": 0.9615755937439661, "grad_norm": 0.6090471148490906, "learning_rate": 6.196452468887764e-05, "loss": 0.6154, "step": 4980 }, { "epoch": 0.9617686812125893, "grad_norm": 0.570325493812561, "learning_rate": 6.194816312598441e-05, "loss": 0.606, "step": 4981 }, { "epoch": 0.9619617686812126, "grad_norm": 0.7819868326187134, "learning_rate": 6.19318002060823e-05, "loss": 0.6038, "step": 4982 }, { "epoch": 0.9621548561498359, "grad_norm": 0.9459628462791443, "learning_rate": 6.191543593102975e-05, "loss": 0.627, "step": 4983 }, { "epoch": 0.9623479436184592, "grad_norm": 1.1399022340774536, "learning_rate": 6.189907030268529e-05, "loss": 0.6531, "step": 4984 }, { "epoch": 0.9625410310870824, "grad_norm": 0.8720825910568237, "learning_rate": 6.188270332290766e-05, "loss": 0.6456, "step": 4985 }, { "epoch": 0.9627341185557058, "grad_norm": 0.9214797019958496, "learning_rate": 6.186633499355576e-05, "loss": 0.6472, "step": 4986 }, { "epoch": 0.962927206024329, "grad_norm": 0.8494054675102234, "learning_rate": 6.184996531648855e-05, "loss": 0.5827, "step": 4987 }, { "epoch": 0.9631202934929524, "grad_norm": 0.8738096952438354, "learning_rate": 6.183359429356528e-05, "loss": 0.6776, "step": 4988 }, { "epoch": 0.9633133809615756, "grad_norm": 1.1333720684051514, "learning_rate": 6.181722192664525e-05, "loss": 0.6571, "step": 4989 }, { "epoch": 0.9635064684301988, "grad_norm": 1.5637699365615845, "learning_rate": 6.180084821758795e-05, "loss": 0.7213, "step": 4990 }, { "epoch": 0.9636995558988222, "grad_norm": 0.7263832092285156, "learning_rate": 6.178447316825302e-05, "loss": 0.6378, "step": 4991 }, { "epoch": 0.9638926433674454, "grad_norm": 0.6865978837013245, "learning_rate": 6.176809678050026e-05, "loss": 0.6625, "step": 4992 }, { "epoch": 0.9640857308360687, "grad_norm": 0.7311380505561829, "learning_rate": 6.17517190561896e-05, "loss": 0.6779, "step": 4993 }, { "epoch": 0.964278818304692, "grad_norm": 0.8494914174079895, "learning_rate": 6.173533999718114e-05, "loss": 0.6663, "step": 4994 }, { "epoch": 0.9644719057733153, "grad_norm": 2.1803650856018066, "learning_rate": 6.171895960533514e-05, "loss": 0.6377, "step": 4995 }, { "epoch": 0.9646649932419386, "grad_norm": 0.8294605016708374, "learning_rate": 6.170257788251197e-05, "loss": 0.6379, "step": 4996 }, { "epoch": 0.9648580807105619, "grad_norm": 1.1303222179412842, "learning_rate": 6.16861948305722e-05, "loss": 0.6104, "step": 4997 }, { "epoch": 0.9650511681791851, "grad_norm": 1.3735084533691406, "learning_rate": 6.166981045137651e-05, "loss": 0.6354, "step": 4998 }, { "epoch": 0.9652442556478085, "grad_norm": 1.163767695426941, "learning_rate": 6.165342474678579e-05, "loss": 0.5881, "step": 4999 }, { "epoch": 0.9654373431164317, "grad_norm": 1.6716928482055664, "learning_rate": 6.163703771866099e-05, "loss": 0.645, "step": 5000 }, { "epoch": 0.9654373431164317, "eval_loss": 0.6761316657066345, "eval_runtime": 49.6701, "eval_samples_per_second": 13.368, "eval_steps_per_second": 0.423, "step": 5000 }, { "epoch": 0.965630430585055, "grad_norm": 0.8248615860939026, "learning_rate": 6.16206493688633e-05, "loss": 0.5499, "step": 5001 }, { "epoch": 0.9658235180536783, "grad_norm": 0.97115159034729, "learning_rate": 6.160425969925399e-05, "loss": 0.6366, "step": 5002 }, { "epoch": 0.9660166055223016, "grad_norm": 1.116755485534668, "learning_rate": 6.158786871169457e-05, "loss": 0.6465, "step": 5003 }, { "epoch": 0.9662096929909249, "grad_norm": 1.0113158226013184, "learning_rate": 6.157147640804657e-05, "loss": 0.6132, "step": 5004 }, { "epoch": 0.9664027804595482, "grad_norm": 1.374178171157837, "learning_rate": 6.155508279017177e-05, "loss": 0.6456, "step": 5005 }, { "epoch": 0.9665958679281714, "grad_norm": 0.6869686841964722, "learning_rate": 6.153868785993209e-05, "loss": 0.6791, "step": 5006 }, { "epoch": 0.9667889553967948, "grad_norm": 1.0831750631332397, "learning_rate": 6.152229161918957e-05, "loss": 0.6014, "step": 5007 }, { "epoch": 0.966982042865418, "grad_norm": 0.9022350907325745, "learning_rate": 6.150589406980638e-05, "loss": 0.6718, "step": 5008 }, { "epoch": 0.9671751303340413, "grad_norm": 0.8265557289123535, "learning_rate": 6.148949521364489e-05, "loss": 0.6014, "step": 5009 }, { "epoch": 0.9673682178026646, "grad_norm": 1.0448250770568848, "learning_rate": 6.14730950525676e-05, "loss": 0.6065, "step": 5010 }, { "epoch": 0.9675613052712879, "grad_norm": 0.9020401835441589, "learning_rate": 6.145669358843713e-05, "loss": 0.701, "step": 5011 }, { "epoch": 0.9677543927399112, "grad_norm": 0.8409692645072937, "learning_rate": 6.14402908231163e-05, "loss": 0.61, "step": 5012 }, { "epoch": 0.9679474802085345, "grad_norm": 1.9574450254440308, "learning_rate": 6.142388675846802e-05, "loss": 0.5726, "step": 5013 }, { "epoch": 0.9681405676771577, "grad_norm": 1.6064599752426147, "learning_rate": 6.14074813963554e-05, "loss": 0.6402, "step": 5014 }, { "epoch": 0.9683336551457811, "grad_norm": 1.1755849123001099, "learning_rate": 6.139107473864166e-05, "loss": 0.6326, "step": 5015 }, { "epoch": 0.9685267426144043, "grad_norm": 0.8633824586868286, "learning_rate": 6.137466678719019e-05, "loss": 0.6414, "step": 5016 }, { "epoch": 0.9687198300830276, "grad_norm": 0.7768634557723999, "learning_rate": 6.135825754386451e-05, "loss": 0.6069, "step": 5017 }, { "epoch": 0.9689129175516509, "grad_norm": 0.9140563011169434, "learning_rate": 6.134184701052829e-05, "loss": 0.6637, "step": 5018 }, { "epoch": 0.9691060050202742, "grad_norm": 0.707225501537323, "learning_rate": 6.132543518904536e-05, "loss": 0.5436, "step": 5019 }, { "epoch": 0.9692990924888975, "grad_norm": 0.6692696809768677, "learning_rate": 6.130902208127971e-05, "loss": 0.615, "step": 5020 }, { "epoch": 0.9694921799575208, "grad_norm": 0.8267423510551453, "learning_rate": 6.129260768909541e-05, "loss": 0.6577, "step": 5021 }, { "epoch": 0.969685267426144, "grad_norm": 2.9511144161224365, "learning_rate": 6.127619201435674e-05, "loss": 0.6842, "step": 5022 }, { "epoch": 0.9698783548947674, "grad_norm": 0.8049082159996033, "learning_rate": 6.125977505892812e-05, "loss": 0.6595, "step": 5023 }, { "epoch": 0.9700714423633906, "grad_norm": 0.819695234298706, "learning_rate": 6.124335682467407e-05, "loss": 0.6561, "step": 5024 }, { "epoch": 0.9702645298320139, "grad_norm": 0.6019622683525085, "learning_rate": 6.12269373134593e-05, "loss": 0.6272, "step": 5025 }, { "epoch": 0.9704576173006372, "grad_norm": 1.1608558893203735, "learning_rate": 6.121051652714867e-05, "loss": 0.6189, "step": 5026 }, { "epoch": 0.9706507047692605, "grad_norm": 0.9054451584815979, "learning_rate": 6.119409446760713e-05, "loss": 0.6091, "step": 5027 }, { "epoch": 0.9708437922378838, "grad_norm": 0.7479997277259827, "learning_rate": 6.117767113669982e-05, "loss": 0.5699, "step": 5028 }, { "epoch": 0.9710368797065071, "grad_norm": 1.1027768850326538, "learning_rate": 6.116124653629205e-05, "loss": 0.6373, "step": 5029 }, { "epoch": 0.9712299671751303, "grad_norm": 0.8097690939903259, "learning_rate": 6.114482066824919e-05, "loss": 0.5483, "step": 5030 }, { "epoch": 0.9714230546437537, "grad_norm": 1.1266589164733887, "learning_rate": 6.112839353443684e-05, "loss": 0.6716, "step": 5031 }, { "epoch": 0.9716161421123769, "grad_norm": 0.9097194075584412, "learning_rate": 6.111196513672067e-05, "loss": 0.6501, "step": 5032 }, { "epoch": 0.9718092295810002, "grad_norm": 1.33905029296875, "learning_rate": 6.109553547696656e-05, "loss": 0.6405, "step": 5033 }, { "epoch": 0.9720023170496235, "grad_norm": 1.616915225982666, "learning_rate": 6.107910455704049e-05, "loss": 0.6402, "step": 5034 }, { "epoch": 0.9721954045182468, "grad_norm": 0.8821542263031006, "learning_rate": 6.106267237880863e-05, "loss": 0.6125, "step": 5035 }, { "epoch": 0.97238849198687, "grad_norm": 0.6747083067893982, "learning_rate": 6.104623894413719e-05, "loss": 0.5783, "step": 5036 }, { "epoch": 0.9725815794554933, "grad_norm": 1.2035056352615356, "learning_rate": 6.1029804254892645e-05, "loss": 0.6268, "step": 5037 }, { "epoch": 0.9727746669241166, "grad_norm": 0.7305690050125122, "learning_rate": 6.1013368312941554e-05, "loss": 0.6534, "step": 5038 }, { "epoch": 0.9729677543927399, "grad_norm": 1.888732671737671, "learning_rate": 6.0996931120150624e-05, "loss": 0.6283, "step": 5039 }, { "epoch": 0.9731608418613632, "grad_norm": 1.3552196025848389, "learning_rate": 6.098049267838669e-05, "loss": 0.622, "step": 5040 }, { "epoch": 0.9733539293299864, "grad_norm": 0.6395645141601562, "learning_rate": 6.096405298951674e-05, "loss": 0.6333, "step": 5041 }, { "epoch": 0.9735470167986098, "grad_norm": 0.7325239181518555, "learning_rate": 6.0947612055407934e-05, "loss": 0.6259, "step": 5042 }, { "epoch": 0.973740104267233, "grad_norm": 0.6775615811347961, "learning_rate": 6.093116987792752e-05, "loss": 0.6048, "step": 5043 }, { "epoch": 0.9739331917358564, "grad_norm": 0.8970676064491272, "learning_rate": 6.0914726458942914e-05, "loss": 0.592, "step": 5044 }, { "epoch": 0.9741262792044796, "grad_norm": 1.0635488033294678, "learning_rate": 6.089828180032169e-05, "loss": 0.62, "step": 5045 }, { "epoch": 0.9743193666731029, "grad_norm": 0.8840670585632324, "learning_rate": 6.088183590393154e-05, "loss": 0.637, "step": 5046 }, { "epoch": 0.9745124541417262, "grad_norm": 0.7426815032958984, "learning_rate": 6.0865388771640275e-05, "loss": 0.6848, "step": 5047 }, { "epoch": 0.9747055416103495, "grad_norm": 3.16938853263855, "learning_rate": 6.08489404053159e-05, "loss": 0.6817, "step": 5048 }, { "epoch": 0.9748986290789727, "grad_norm": 2.0505969524383545, "learning_rate": 6.0832490806826546e-05, "loss": 0.6576, "step": 5049 }, { "epoch": 0.9750917165475961, "grad_norm": 1.6265891790390015, "learning_rate": 6.081603997804044e-05, "loss": 0.6287, "step": 5050 }, { "epoch": 0.9752848040162193, "grad_norm": 1.4738366603851318, "learning_rate": 6.079958792082599e-05, "loss": 0.5938, "step": 5051 }, { "epoch": 0.9754778914848427, "grad_norm": 0.7761298418045044, "learning_rate": 6.078313463705173e-05, "loss": 0.6143, "step": 5052 }, { "epoch": 0.9756709789534659, "grad_norm": 1.3283363580703735, "learning_rate": 6.076668012858635e-05, "loss": 0.5774, "step": 5053 }, { "epoch": 0.9758640664220892, "grad_norm": 1.0346455574035645, "learning_rate": 6.075022439729865e-05, "loss": 0.6622, "step": 5054 }, { "epoch": 0.9760571538907125, "grad_norm": 1.5760937929153442, "learning_rate": 6.07337674450576e-05, "loss": 0.6432, "step": 5055 }, { "epoch": 0.9762502413593358, "grad_norm": 1.5585546493530273, "learning_rate": 6.0717309273732283e-05, "loss": 0.6168, "step": 5056 }, { "epoch": 0.976443328827959, "grad_norm": 4.87908411026001, "learning_rate": 6.0700849885191925e-05, "loss": 0.6601, "step": 5057 }, { "epoch": 0.9766364162965824, "grad_norm": 1.7410048246383667, "learning_rate": 6.068438928130591e-05, "loss": 0.6283, "step": 5058 }, { "epoch": 0.9768295037652056, "grad_norm": 0.6156486868858337, "learning_rate": 6.066792746394373e-05, "loss": 0.6121, "step": 5059 }, { "epoch": 0.977022591233829, "grad_norm": 2.14704966545105, "learning_rate": 6.065146443497507e-05, "loss": 0.6014, "step": 5060 }, { "epoch": 0.9772156787024522, "grad_norm": 0.7753509879112244, "learning_rate": 6.0635000196269655e-05, "loss": 0.622, "step": 5061 }, { "epoch": 0.9774087661710755, "grad_norm": 1.003991723060608, "learning_rate": 6.061853474969743e-05, "loss": 0.5914, "step": 5062 }, { "epoch": 0.9776018536396988, "grad_norm": 1.762815237045288, "learning_rate": 6.0602068097128475e-05, "loss": 0.6349, "step": 5063 }, { "epoch": 0.9777949411083221, "grad_norm": 0.8049954175949097, "learning_rate": 6.058560024043296e-05, "loss": 0.6304, "step": 5064 }, { "epoch": 0.9779880285769453, "grad_norm": 0.6902963519096375, "learning_rate": 6.056913118148122e-05, "loss": 0.6533, "step": 5065 }, { "epoch": 0.9781811160455687, "grad_norm": 0.9104796051979065, "learning_rate": 6.0552660922143734e-05, "loss": 0.6509, "step": 5066 }, { "epoch": 0.9783742035141919, "grad_norm": 0.6218077540397644, "learning_rate": 6.0536189464291096e-05, "loss": 0.5353, "step": 5067 }, { "epoch": 0.9785672909828153, "grad_norm": 0.5870218873023987, "learning_rate": 6.0519716809794045e-05, "loss": 0.5861, "step": 5068 }, { "epoch": 0.9787603784514385, "grad_norm": 0.5141420364379883, "learning_rate": 6.0503242960523465e-05, "loss": 0.6479, "step": 5069 }, { "epoch": 0.9789534659200618, "grad_norm": 1.4960119724273682, "learning_rate": 6.048676791835037e-05, "loss": 0.6418, "step": 5070 }, { "epoch": 0.9791465533886851, "grad_norm": 1.0557218790054321, "learning_rate": 6.0470291685145894e-05, "loss": 0.6439, "step": 5071 }, { "epoch": 0.9793396408573084, "grad_norm": 3.232699155807495, "learning_rate": 6.045381426278134e-05, "loss": 0.6775, "step": 5072 }, { "epoch": 0.9795327283259316, "grad_norm": 2.2946882247924805, "learning_rate": 6.04373356531281e-05, "loss": 0.5443, "step": 5073 }, { "epoch": 0.979725815794555, "grad_norm": 0.8859127759933472, "learning_rate": 6.0420855858057765e-05, "loss": 0.6682, "step": 5074 }, { "epoch": 0.9799189032631782, "grad_norm": 0.8153247237205505, "learning_rate": 6.0404374879441996e-05, "loss": 0.5498, "step": 5075 }, { "epoch": 0.9801119907318016, "grad_norm": 0.9998711943626404, "learning_rate": 6.0387892719152605e-05, "loss": 0.5927, "step": 5076 }, { "epoch": 0.9803050782004248, "grad_norm": 0.6098742485046387, "learning_rate": 6.037140937906157e-05, "loss": 0.6348, "step": 5077 }, { "epoch": 0.9804981656690481, "grad_norm": 1.5954437255859375, "learning_rate": 6.0354924861040974e-05, "loss": 0.6053, "step": 5078 }, { "epoch": 0.9806912531376714, "grad_norm": 1.5804105997085571, "learning_rate": 6.033843916696303e-05, "loss": 0.7194, "step": 5079 }, { "epoch": 0.9808843406062947, "grad_norm": 0.7647485733032227, "learning_rate": 6.032195229870012e-05, "loss": 0.6447, "step": 5080 }, { "epoch": 0.9810774280749179, "grad_norm": 5.411445140838623, "learning_rate": 6.030546425812472e-05, "loss": 0.5711, "step": 5081 }, { "epoch": 0.9812705155435413, "grad_norm": 0.7779829502105713, "learning_rate": 6.028897504710944e-05, "loss": 0.6125, "step": 5082 }, { "epoch": 0.9814636030121645, "grad_norm": 0.5981501340866089, "learning_rate": 6.0272484667527074e-05, "loss": 0.6182, "step": 5083 }, { "epoch": 0.9816566904807877, "grad_norm": 0.8884282112121582, "learning_rate": 6.025599312125048e-05, "loss": 0.6274, "step": 5084 }, { "epoch": 0.9818497779494111, "grad_norm": 0.6399535536766052, "learning_rate": 6.0239500410152684e-05, "loss": 0.6851, "step": 5085 }, { "epoch": 0.9820428654180343, "grad_norm": 1.0014777183532715, "learning_rate": 6.022300653610684e-05, "loss": 0.6173, "step": 5086 }, { "epoch": 0.9822359528866577, "grad_norm": 0.747489869594574, "learning_rate": 6.020651150098625e-05, "loss": 0.6065, "step": 5087 }, { "epoch": 0.9824290403552809, "grad_norm": 1.1118117570877075, "learning_rate": 6.019001530666433e-05, "loss": 0.6539, "step": 5088 }, { "epoch": 0.9826221278239042, "grad_norm": 1.8408104181289673, "learning_rate": 6.0173517955014614e-05, "loss": 0.6346, "step": 5089 }, { "epoch": 0.9828152152925275, "grad_norm": 0.7367414832115173, "learning_rate": 6.015701944791079e-05, "loss": 0.6234, "step": 5090 }, { "epoch": 0.9830083027611508, "grad_norm": 0.6882725954055786, "learning_rate": 6.014051978722668e-05, "loss": 0.5838, "step": 5091 }, { "epoch": 0.983201390229774, "grad_norm": 1.2354029417037964, "learning_rate": 6.012401897483622e-05, "loss": 0.6307, "step": 5092 }, { "epoch": 0.9833944776983974, "grad_norm": 0.7530829906463623, "learning_rate": 6.010751701261348e-05, "loss": 0.611, "step": 5093 }, { "epoch": 0.9835875651670206, "grad_norm": 1.5049093961715698, "learning_rate": 6.009101390243267e-05, "loss": 0.6659, "step": 5094 }, { "epoch": 0.983780652635644, "grad_norm": 0.5914728045463562, "learning_rate": 6.0074509646168144e-05, "loss": 0.6021, "step": 5095 }, { "epoch": 0.9839737401042672, "grad_norm": 0.6052276492118835, "learning_rate": 6.005800424569433e-05, "loss": 0.6293, "step": 5096 }, { "epoch": 0.9841668275728905, "grad_norm": 0.9801470637321472, "learning_rate": 6.004149770288584e-05, "loss": 0.6281, "step": 5097 }, { "epoch": 0.9843599150415138, "grad_norm": 1.2286144495010376, "learning_rate": 6.002499001961742e-05, "loss": 0.6389, "step": 5098 }, { "epoch": 0.9845530025101371, "grad_norm": 1.069869041442871, "learning_rate": 6.000848119776391e-05, "loss": 0.6165, "step": 5099 }, { "epoch": 0.9847460899787603, "grad_norm": 0.8201898336410522, "learning_rate": 5.9991971239200286e-05, "loss": 0.6247, "step": 5100 }, { "epoch": 0.9849391774473837, "grad_norm": 0.984964907169342, "learning_rate": 5.9975460145801664e-05, "loss": 0.6618, "step": 5101 }, { "epoch": 0.9851322649160069, "grad_norm": 2.861299753189087, "learning_rate": 5.9958947919443295e-05, "loss": 0.619, "step": 5102 }, { "epoch": 0.9853253523846303, "grad_norm": 5.605594158172607, "learning_rate": 5.994243456200055e-05, "loss": 0.6466, "step": 5103 }, { "epoch": 0.9855184398532535, "grad_norm": 3.825437307357788, "learning_rate": 5.992592007534891e-05, "loss": 0.6423, "step": 5104 }, { "epoch": 0.9857115273218768, "grad_norm": 0.9091773629188538, "learning_rate": 5.990940446136403e-05, "loss": 0.6633, "step": 5105 }, { "epoch": 0.9859046147905001, "grad_norm": 2.5278563499450684, "learning_rate": 5.989288772192164e-05, "loss": 0.6565, "step": 5106 }, { "epoch": 0.9860977022591234, "grad_norm": 0.6463882923126221, "learning_rate": 5.987636985889764e-05, "loss": 0.6664, "step": 5107 }, { "epoch": 0.9862907897277466, "grad_norm": 0.5385766625404358, "learning_rate": 5.985985087416803e-05, "loss": 0.5289, "step": 5108 }, { "epoch": 0.98648387719637, "grad_norm": 0.9059380888938904, "learning_rate": 5.984333076960897e-05, "loss": 0.6296, "step": 5109 }, { "epoch": 0.9866769646649932, "grad_norm": 0.9683897495269775, "learning_rate": 5.9826809547096696e-05, "loss": 0.5963, "step": 5110 }, { "epoch": 0.9868700521336166, "grad_norm": 0.6538642644882202, "learning_rate": 5.981028720850762e-05, "loss": 0.6529, "step": 5111 }, { "epoch": 0.9870631396022398, "grad_norm": 0.5329998731613159, "learning_rate": 5.979376375571826e-05, "loss": 0.6456, "step": 5112 }, { "epoch": 0.9872562270708631, "grad_norm": 2.5810387134552, "learning_rate": 5.9777239190605264e-05, "loss": 0.6245, "step": 5113 }, { "epoch": 0.9874493145394864, "grad_norm": 1.2676281929016113, "learning_rate": 5.976071351504538e-05, "loss": 0.7095, "step": 5114 }, { "epoch": 0.9876424020081097, "grad_norm": 0.9531176090240479, "learning_rate": 5.974418673091554e-05, "loss": 0.5692, "step": 5115 }, { "epoch": 0.9878354894767329, "grad_norm": 1.5735468864440918, "learning_rate": 5.972765884009275e-05, "loss": 0.5853, "step": 5116 }, { "epoch": 0.9880285769453563, "grad_norm": 1.129485011100769, "learning_rate": 5.971112984445415e-05, "loss": 0.6369, "step": 5117 }, { "epoch": 0.9882216644139795, "grad_norm": 0.681807816028595, "learning_rate": 5.9694599745877054e-05, "loss": 0.6483, "step": 5118 }, { "epoch": 0.9884147518826029, "grad_norm": 1.0994399785995483, "learning_rate": 5.967806854623882e-05, "loss": 0.6578, "step": 5119 }, { "epoch": 0.9886078393512261, "grad_norm": 1.1129257678985596, "learning_rate": 5.966153624741699e-05, "loss": 0.6038, "step": 5120 }, { "epoch": 0.9888009268198494, "grad_norm": 1.1630957126617432, "learning_rate": 5.9645002851289235e-05, "loss": 0.6394, "step": 5121 }, { "epoch": 0.9889940142884727, "grad_norm": 0.6411422491073608, "learning_rate": 5.962846835973329e-05, "loss": 0.5648, "step": 5122 }, { "epoch": 0.989187101757096, "grad_norm": 1.2874733209609985, "learning_rate": 5.9611932774627085e-05, "loss": 0.5845, "step": 5123 }, { "epoch": 0.9893801892257192, "grad_norm": 0.9296566843986511, "learning_rate": 5.959539609784864e-05, "loss": 0.6754, "step": 5124 }, { "epoch": 0.9895732766943426, "grad_norm": 0.7662822604179382, "learning_rate": 5.957885833127609e-05, "loss": 0.6234, "step": 5125 }, { "epoch": 0.9897663641629658, "grad_norm": 0.8226108551025391, "learning_rate": 5.956231947678772e-05, "loss": 0.5808, "step": 5126 }, { "epoch": 0.9899594516315892, "grad_norm": 0.6061047315597534, "learning_rate": 5.954577953626192e-05, "loss": 0.5819, "step": 5127 }, { "epoch": 0.9901525391002124, "grad_norm": 1.1136265993118286, "learning_rate": 5.9529238511577214e-05, "loss": 0.6012, "step": 5128 }, { "epoch": 0.9903456265688357, "grad_norm": 1.955655813217163, "learning_rate": 5.9512696404612236e-05, "loss": 0.566, "step": 5129 }, { "epoch": 0.990538714037459, "grad_norm": 0.6582748293876648, "learning_rate": 5.949615321724574e-05, "loss": 0.6101, "step": 5130 }, { "epoch": 0.9907318015060823, "grad_norm": 1.1531052589416504, "learning_rate": 5.947960895135662e-05, "loss": 0.6501, "step": 5131 }, { "epoch": 0.9909248889747055, "grad_norm": 0.9758384823799133, "learning_rate": 5.946306360882392e-05, "loss": 0.6412, "step": 5132 }, { "epoch": 0.9911179764433288, "grad_norm": 0.5397288799285889, "learning_rate": 5.944651719152672e-05, "loss": 0.6739, "step": 5133 }, { "epoch": 0.9913110639119521, "grad_norm": 0.7558644413948059, "learning_rate": 5.942996970134431e-05, "loss": 0.6268, "step": 5134 }, { "epoch": 0.9915041513805753, "grad_norm": 1.0195422172546387, "learning_rate": 5.9413421140156055e-05, "loss": 0.6481, "step": 5135 }, { "epoch": 0.9916972388491987, "grad_norm": 0.894505500793457, "learning_rate": 5.939687150984143e-05, "loss": 0.5952, "step": 5136 }, { "epoch": 0.9918903263178219, "grad_norm": 3.0154807567596436, "learning_rate": 5.938032081228011e-05, "loss": 0.631, "step": 5137 }, { "epoch": 0.9920834137864453, "grad_norm": 1.3561887741088867, "learning_rate": 5.936376904935178e-05, "loss": 0.6326, "step": 5138 }, { "epoch": 0.9922765012550685, "grad_norm": 1.0252318382263184, "learning_rate": 5.934721622293632e-05, "loss": 0.5984, "step": 5139 }, { "epoch": 0.9924695887236918, "grad_norm": 0.848016619682312, "learning_rate": 5.933066233491371e-05, "loss": 0.5902, "step": 5140 }, { "epoch": 0.9926626761923151, "grad_norm": 0.8896539807319641, "learning_rate": 5.9314107387164084e-05, "loss": 0.5801, "step": 5141 }, { "epoch": 0.9928557636609384, "grad_norm": 1.1373941898345947, "learning_rate": 5.929755138156762e-05, "loss": 0.6378, "step": 5142 }, { "epoch": 0.9930488511295616, "grad_norm": 1.1579818725585938, "learning_rate": 5.928099432000467e-05, "loss": 0.6572, "step": 5143 }, { "epoch": 0.993241938598185, "grad_norm": 1.5565221309661865, "learning_rate": 5.9264436204355724e-05, "loss": 0.6444, "step": 5144 }, { "epoch": 0.9934350260668082, "grad_norm": 1.5104058980941772, "learning_rate": 5.924787703650134e-05, "loss": 0.642, "step": 5145 }, { "epoch": 0.9936281135354316, "grad_norm": 1.9635125398635864, "learning_rate": 5.9231316818322224e-05, "loss": 0.5687, "step": 5146 }, { "epoch": 0.9938212010040548, "grad_norm": 7.762699127197266, "learning_rate": 5.9214755551699194e-05, "loss": 0.667, "step": 5147 }, { "epoch": 0.9940142884726781, "grad_norm": 0.9651495814323425, "learning_rate": 5.919819323851321e-05, "loss": 0.6297, "step": 5148 }, { "epoch": 0.9942073759413014, "grad_norm": 0.6011384725570679, "learning_rate": 5.918162988064532e-05, "loss": 0.6002, "step": 5149 }, { "epoch": 0.9944004634099247, "grad_norm": 1.7829209566116333, "learning_rate": 5.9165065479976686e-05, "loss": 0.6499, "step": 5150 }, { "epoch": 0.994593550878548, "grad_norm": 0.9244329929351807, "learning_rate": 5.9148500038388636e-05, "loss": 0.6398, "step": 5151 }, { "epoch": 0.9947866383471713, "grad_norm": 0.6991683840751648, "learning_rate": 5.913193355776256e-05, "loss": 0.6277, "step": 5152 }, { "epoch": 0.9949797258157945, "grad_norm": 0.9479567408561707, "learning_rate": 5.911536603998e-05, "loss": 0.6572, "step": 5153 }, { "epoch": 0.9951728132844179, "grad_norm": 0.6603127717971802, "learning_rate": 5.90987974869226e-05, "loss": 0.6106, "step": 5154 }, { "epoch": 0.9953659007530411, "grad_norm": 0.9633632898330688, "learning_rate": 5.9082227900472156e-05, "loss": 0.6149, "step": 5155 }, { "epoch": 0.9955589882216644, "grad_norm": 0.6694616675376892, "learning_rate": 5.9065657282510514e-05, "loss": 0.6486, "step": 5156 }, { "epoch": 0.9957520756902877, "grad_norm": 1.1262246370315552, "learning_rate": 5.9049085634919684e-05, "loss": 0.6264, "step": 5157 }, { "epoch": 0.995945163158911, "grad_norm": 0.8009213209152222, "learning_rate": 5.903251295958182e-05, "loss": 0.6375, "step": 5158 }, { "epoch": 0.9961382506275342, "grad_norm": 1.2488843202590942, "learning_rate": 5.9015939258379125e-05, "loss": 0.6719, "step": 5159 }, { "epoch": 0.9963313380961576, "grad_norm": 1.7265784740447998, "learning_rate": 5.8999364533193957e-05, "loss": 0.6155, "step": 5160 }, { "epoch": 0.9965244255647808, "grad_norm": 0.896141767501831, "learning_rate": 5.8982788785908794e-05, "loss": 0.6607, "step": 5161 }, { "epoch": 0.9967175130334042, "grad_norm": 0.7643996477127075, "learning_rate": 5.896621201840622e-05, "loss": 0.5968, "step": 5162 }, { "epoch": 0.9969106005020274, "grad_norm": 0.7051860690116882, "learning_rate": 5.894963423256892e-05, "loss": 0.551, "step": 5163 }, { "epoch": 0.9971036879706507, "grad_norm": 1.0826735496520996, "learning_rate": 5.893305543027974e-05, "loss": 0.6175, "step": 5164 }, { "epoch": 0.997296775439274, "grad_norm": 1.0301706790924072, "learning_rate": 5.8916475613421575e-05, "loss": 0.6546, "step": 5165 }, { "epoch": 0.9974898629078973, "grad_norm": 1.0816569328308105, "learning_rate": 5.889989478387753e-05, "loss": 0.6307, "step": 5166 }, { "epoch": 0.9976829503765205, "grad_norm": 1.9089975357055664, "learning_rate": 5.88833129435307e-05, "loss": 0.7028, "step": 5167 }, { "epoch": 0.9978760378451439, "grad_norm": 0.9631651043891907, "learning_rate": 5.886673009426439e-05, "loss": 0.5779, "step": 5168 }, { "epoch": 0.9980691253137671, "grad_norm": 1.0312538146972656, "learning_rate": 5.8850146237962014e-05, "loss": 0.6384, "step": 5169 }, { "epoch": 0.9982622127823905, "grad_norm": 0.9108650088310242, "learning_rate": 5.8833561376507065e-05, "loss": 0.637, "step": 5170 }, { "epoch": 0.9984553002510137, "grad_norm": 1.3556182384490967, "learning_rate": 5.8816975511783135e-05, "loss": 0.6224, "step": 5171 }, { "epoch": 0.998648387719637, "grad_norm": 0.7891402244567871, "learning_rate": 5.8800388645673986e-05, "loss": 0.6385, "step": 5172 }, { "epoch": 0.9988414751882603, "grad_norm": 1.043941855430603, "learning_rate": 5.878380078006348e-05, "loss": 0.6396, "step": 5173 }, { "epoch": 0.9990345626568836, "grad_norm": 2.468651294708252, "learning_rate": 5.876721191683554e-05, "loss": 0.5715, "step": 5174 }, { "epoch": 0.9992276501255068, "grad_norm": 0.7470455765724182, "learning_rate": 5.875062205787427e-05, "loss": 0.6158, "step": 5175 }, { "epoch": 0.9994207375941302, "grad_norm": 0.9310211539268494, "learning_rate": 5.873403120506385e-05, "loss": 0.6818, "step": 5176 }, { "epoch": 0.9996138250627534, "grad_norm": 0.950301468372345, "learning_rate": 5.8717439360288576e-05, "loss": 0.6098, "step": 5177 }, { "epoch": 0.9998069125313768, "grad_norm": 1.6965357065200806, "learning_rate": 5.870084652543287e-05, "loss": 0.6604, "step": 5178 }, { "epoch": 1.0, "grad_norm": 0.5876534581184387, "learning_rate": 5.8684252702381247e-05, "loss": 0.6339, "step": 5179 }, { "epoch": 1.0001930874686233, "grad_norm": 1.026408076286316, "learning_rate": 5.8667657893018356e-05, "loss": 0.662, "step": 5180 }, { "epoch": 1.0003861749372465, "grad_norm": 1.0208415985107422, "learning_rate": 5.865106209922895e-05, "loss": 0.6625, "step": 5181 }, { "epoch": 1.0005792624058698, "grad_norm": 1.3542766571044922, "learning_rate": 5.863446532289787e-05, "loss": 0.5824, "step": 5182 }, { "epoch": 1.0007723498744932, "grad_norm": 0.9264121651649475, "learning_rate": 5.861786756591012e-05, "loss": 0.5839, "step": 5183 }, { "epoch": 1.0009654373431165, "grad_norm": 1.2074389457702637, "learning_rate": 5.860126883015076e-05, "loss": 0.6089, "step": 5184 }, { "epoch": 1.0011585248117396, "grad_norm": 2.520765542984009, "learning_rate": 5.858466911750499e-05, "loss": 0.66, "step": 5185 }, { "epoch": 1.001351612280363, "grad_norm": 1.065276861190796, "learning_rate": 5.856806842985813e-05, "loss": 0.6506, "step": 5186 }, { "epoch": 1.0015446997489863, "grad_norm": 1.0318937301635742, "learning_rate": 5.8551466769095586e-05, "loss": 0.5868, "step": 5187 }, { "epoch": 1.0017377872176096, "grad_norm": 0.9653267860412598, "learning_rate": 5.853486413710288e-05, "loss": 0.6, "step": 5188 }, { "epoch": 1.0019308746862328, "grad_norm": 1.2265006303787231, "learning_rate": 5.851826053576569e-05, "loss": 0.5546, "step": 5189 }, { "epoch": 1.002123962154856, "grad_norm": 0.9924628734588623, "learning_rate": 5.8501655966969715e-05, "loss": 0.6161, "step": 5190 }, { "epoch": 1.0023170496234795, "grad_norm": 2.3527305126190186, "learning_rate": 5.848505043260084e-05, "loss": 0.5703, "step": 5191 }, { "epoch": 1.0025101370921028, "grad_norm": 1.3935514688491821, "learning_rate": 5.8468443934545025e-05, "loss": 0.5307, "step": 5192 }, { "epoch": 1.002703224560726, "grad_norm": 0.8236677646636963, "learning_rate": 5.845183647468835e-05, "loss": 0.5768, "step": 5193 }, { "epoch": 1.0028963120293493, "grad_norm": 1.268323540687561, "learning_rate": 5.8435228054917004e-05, "loss": 0.5736, "step": 5194 }, { "epoch": 1.0030893994979726, "grad_norm": 1.4954078197479248, "learning_rate": 5.841861867711729e-05, "loss": 0.673, "step": 5195 }, { "epoch": 1.003282486966596, "grad_norm": 0.9534615278244019, "learning_rate": 5.8402008343175584e-05, "loss": 0.5794, "step": 5196 }, { "epoch": 1.003475574435219, "grad_norm": 0.7408707141876221, "learning_rate": 5.838539705497844e-05, "loss": 0.6073, "step": 5197 }, { "epoch": 1.0036686619038424, "grad_norm": 1.7416174411773682, "learning_rate": 5.836878481441246e-05, "loss": 0.6304, "step": 5198 }, { "epoch": 1.0038617493724658, "grad_norm": 0.8105040192604065, "learning_rate": 5.835217162336436e-05, "loss": 0.6503, "step": 5199 }, { "epoch": 1.004054836841089, "grad_norm": 0.7645242810249329, "learning_rate": 5.8335557483720994e-05, "loss": 0.6387, "step": 5200 }, { "epoch": 1.0042479243097122, "grad_norm": 5.935206413269043, "learning_rate": 5.831894239736932e-05, "loss": 0.5463, "step": 5201 }, { "epoch": 1.0044410117783356, "grad_norm": 1.1682642698287964, "learning_rate": 5.830232636619637e-05, "loss": 0.596, "step": 5202 }, { "epoch": 1.004634099246959, "grad_norm": 0.9954179525375366, "learning_rate": 5.82857093920893e-05, "loss": 0.6345, "step": 5203 }, { "epoch": 1.0048271867155822, "grad_norm": 0.9341858625411987, "learning_rate": 5.82690914769354e-05, "loss": 0.5913, "step": 5204 }, { "epoch": 1.0050202741842054, "grad_norm": 1.482818603515625, "learning_rate": 5.8252472622622046e-05, "loss": 0.6978, "step": 5205 }, { "epoch": 1.0052133616528287, "grad_norm": 0.9371077418327332, "learning_rate": 5.8235852831036694e-05, "loss": 0.6359, "step": 5206 }, { "epoch": 1.005406449121452, "grad_norm": 0.9900992512702942, "learning_rate": 5.8219232104066936e-05, "loss": 0.5973, "step": 5207 }, { "epoch": 1.0055995365900754, "grad_norm": 0.6342381834983826, "learning_rate": 5.8202610443600483e-05, "loss": 0.5612, "step": 5208 }, { "epoch": 1.0057926240586985, "grad_norm": 0.7378602027893066, "learning_rate": 5.8185987851525135e-05, "loss": 0.6508, "step": 5209 }, { "epoch": 1.0059857115273219, "grad_norm": 1.2126680612564087, "learning_rate": 5.816936432972877e-05, "loss": 0.6176, "step": 5210 }, { "epoch": 1.0061787989959452, "grad_norm": 0.8025833368301392, "learning_rate": 5.815273988009943e-05, "loss": 0.5945, "step": 5211 }, { "epoch": 1.0063718864645685, "grad_norm": 2.036750078201294, "learning_rate": 5.813611450452522e-05, "loss": 0.6884, "step": 5212 }, { "epoch": 1.0065649739331917, "grad_norm": 1.2008589506149292, "learning_rate": 5.811948820489435e-05, "loss": 0.5536, "step": 5213 }, { "epoch": 1.006758061401815, "grad_norm": 1.1238501071929932, "learning_rate": 5.8102860983095156e-05, "loss": 0.6382, "step": 5214 }, { "epoch": 1.0069511488704384, "grad_norm": 9.582169532775879, "learning_rate": 5.80862328410161e-05, "loss": 0.6775, "step": 5215 }, { "epoch": 1.0071442363390617, "grad_norm": 1.253978967666626, "learning_rate": 5.806960378054566e-05, "loss": 0.6291, "step": 5216 }, { "epoch": 1.0073373238076848, "grad_norm": 0.6822975873947144, "learning_rate": 5.805297380357251e-05, "loss": 0.5841, "step": 5217 }, { "epoch": 1.0075304112763082, "grad_norm": 1.076698899269104, "learning_rate": 5.80363429119854e-05, "loss": 0.5977, "step": 5218 }, { "epoch": 1.0077234987449315, "grad_norm": 1.0083245038986206, "learning_rate": 5.801971110767317e-05, "loss": 0.6707, "step": 5219 }, { "epoch": 1.0079165862135548, "grad_norm": 0.6624242663383484, "learning_rate": 5.800307839252476e-05, "loss": 0.6589, "step": 5220 }, { "epoch": 1.008109673682178, "grad_norm": 1.5969640016555786, "learning_rate": 5.798644476842925e-05, "loss": 0.6762, "step": 5221 }, { "epoch": 1.0083027611508013, "grad_norm": 0.7390683889389038, "learning_rate": 5.796981023727578e-05, "loss": 0.6425, "step": 5222 }, { "epoch": 1.0084958486194247, "grad_norm": 0.7897064685821533, "learning_rate": 5.7953174800953604e-05, "loss": 0.629, "step": 5223 }, { "epoch": 1.008688936088048, "grad_norm": 2.210813522338867, "learning_rate": 5.793653846135212e-05, "loss": 0.6328, "step": 5224 }, { "epoch": 1.0088820235566711, "grad_norm": 1.3042476177215576, "learning_rate": 5.791990122036075e-05, "loss": 0.5718, "step": 5225 }, { "epoch": 1.0090751110252945, "grad_norm": 0.7613709568977356, "learning_rate": 5.79032630798691e-05, "loss": 0.5743, "step": 5226 }, { "epoch": 1.0092681984939178, "grad_norm": 1.1021612882614136, "learning_rate": 5.788662404176684e-05, "loss": 0.5999, "step": 5227 }, { "epoch": 1.009461285962541, "grad_norm": 0.7331068515777588, "learning_rate": 5.7869984107943706e-05, "loss": 0.6561, "step": 5228 }, { "epoch": 1.0096543734311643, "grad_norm": 0.9121596217155457, "learning_rate": 5.785334328028962e-05, "loss": 0.6351, "step": 5229 }, { "epoch": 1.0098474608997876, "grad_norm": 0.8250612616539001, "learning_rate": 5.783670156069454e-05, "loss": 0.6917, "step": 5230 }, { "epoch": 1.010040548368411, "grad_norm": 0.79511559009552, "learning_rate": 5.7820058951048516e-05, "loss": 0.6199, "step": 5231 }, { "epoch": 1.010233635837034, "grad_norm": 1.081986904144287, "learning_rate": 5.7803415453241784e-05, "loss": 0.5889, "step": 5232 }, { "epoch": 1.0104267233056574, "grad_norm": 1.1499571800231934, "learning_rate": 5.778677106916458e-05, "loss": 0.6508, "step": 5233 }, { "epoch": 1.0106198107742808, "grad_norm": 0.6165075898170471, "learning_rate": 5.77701258007073e-05, "loss": 0.6501, "step": 5234 }, { "epoch": 1.010812898242904, "grad_norm": 1.702516794204712, "learning_rate": 5.775347964976042e-05, "loss": 0.5393, "step": 5235 }, { "epoch": 1.0110059857115272, "grad_norm": 0.6256076693534851, "learning_rate": 5.7736832618214545e-05, "loss": 0.5545, "step": 5236 }, { "epoch": 1.0111990731801506, "grad_norm": 1.445862889289856, "learning_rate": 5.77201847079603e-05, "loss": 0.6141, "step": 5237 }, { "epoch": 1.011392160648774, "grad_norm": 0.9481111168861389, "learning_rate": 5.770353592088853e-05, "loss": 0.6799, "step": 5238 }, { "epoch": 1.0115852481173973, "grad_norm": 1.1194249391555786, "learning_rate": 5.768688625889007e-05, "loss": 0.6068, "step": 5239 }, { "epoch": 1.0117783355860204, "grad_norm": 0.5780193209648132, "learning_rate": 5.767023572385594e-05, "loss": 0.6155, "step": 5240 }, { "epoch": 1.0119714230546437, "grad_norm": 0.9625962376594543, "learning_rate": 5.765358431767719e-05, "loss": 0.5762, "step": 5241 }, { "epoch": 1.012164510523267, "grad_norm": 1.2801426649093628, "learning_rate": 5.7636932042244994e-05, "loss": 0.6701, "step": 5242 }, { "epoch": 1.0123575979918904, "grad_norm": 0.7298070192337036, "learning_rate": 5.7620278899450654e-05, "loss": 0.5795, "step": 5243 }, { "epoch": 1.0125506854605135, "grad_norm": 1.363322138786316, "learning_rate": 5.7603624891185526e-05, "loss": 0.6025, "step": 5244 }, { "epoch": 1.0127437729291369, "grad_norm": 1.1086851358413696, "learning_rate": 5.758697001934108e-05, "loss": 0.6782, "step": 5245 }, { "epoch": 1.0129368603977602, "grad_norm": 0.8901178240776062, "learning_rate": 5.7570314285808905e-05, "loss": 0.702, "step": 5246 }, { "epoch": 1.0131299478663836, "grad_norm": 3.2677252292633057, "learning_rate": 5.755365769248068e-05, "loss": 0.597, "step": 5247 }, { "epoch": 1.0133230353350067, "grad_norm": 1.5513466596603394, "learning_rate": 5.753700024124814e-05, "loss": 0.5864, "step": 5248 }, { "epoch": 1.01351612280363, "grad_norm": 0.7636075019836426, "learning_rate": 5.752034193400318e-05, "loss": 0.5848, "step": 5249 }, { "epoch": 1.0137092102722534, "grad_norm": 3.0747666358947754, "learning_rate": 5.750368277263771e-05, "loss": 0.6037, "step": 5250 }, { "epoch": 1.0139022977408767, "grad_norm": 10.417352676391602, "learning_rate": 5.748702275904386e-05, "loss": 0.5919, "step": 5251 }, { "epoch": 1.0140953852094998, "grad_norm": 0.6887848973274231, "learning_rate": 5.7470361895113754e-05, "loss": 0.6185, "step": 5252 }, { "epoch": 1.0142884726781232, "grad_norm": 0.7340267896652222, "learning_rate": 5.745370018273962e-05, "loss": 0.5493, "step": 5253 }, { "epoch": 1.0144815601467465, "grad_norm": 1.8902487754821777, "learning_rate": 5.743703762381385e-05, "loss": 0.5988, "step": 5254 }, { "epoch": 1.0146746476153699, "grad_norm": 1.1457160711288452, "learning_rate": 5.742037422022887e-05, "loss": 0.6259, "step": 5255 }, { "epoch": 1.014867735083993, "grad_norm": 2.1413261890411377, "learning_rate": 5.740370997387721e-05, "loss": 0.6332, "step": 5256 }, { "epoch": 1.0150608225526163, "grad_norm": 1.7141849994659424, "learning_rate": 5.738704488665152e-05, "loss": 0.6003, "step": 5257 }, { "epoch": 1.0152539100212397, "grad_norm": 0.9827219843864441, "learning_rate": 5.737037896044453e-05, "loss": 0.6713, "step": 5258 }, { "epoch": 1.015446997489863, "grad_norm": 1.0784339904785156, "learning_rate": 5.7353712197149045e-05, "loss": 0.5999, "step": 5259 }, { "epoch": 1.0156400849584861, "grad_norm": 1.0982728004455566, "learning_rate": 5.7337044598658015e-05, "loss": 0.6403, "step": 5260 }, { "epoch": 1.0158331724271095, "grad_norm": 0.8498335480690002, "learning_rate": 5.7320376166864476e-05, "loss": 0.6618, "step": 5261 }, { "epoch": 1.0160262598957328, "grad_norm": 1.597487211227417, "learning_rate": 5.730370690366149e-05, "loss": 0.5736, "step": 5262 }, { "epoch": 1.0162193473643562, "grad_norm": 1.457605004310608, "learning_rate": 5.7287036810942284e-05, "loss": 0.6556, "step": 5263 }, { "epoch": 1.0164124348329793, "grad_norm": 1.5857654809951782, "learning_rate": 5.727036589060017e-05, "loss": 0.5428, "step": 5264 }, { "epoch": 1.0166055223016026, "grad_norm": 1.5760763883590698, "learning_rate": 5.725369414452855e-05, "loss": 0.6392, "step": 5265 }, { "epoch": 1.016798609770226, "grad_norm": 1.1986989974975586, "learning_rate": 5.723702157462086e-05, "loss": 0.5719, "step": 5266 }, { "epoch": 1.0169916972388493, "grad_norm": 1.8578741550445557, "learning_rate": 5.722034818277075e-05, "loss": 0.6244, "step": 5267 }, { "epoch": 1.0171847847074724, "grad_norm": 0.9038626551628113, "learning_rate": 5.720367397087185e-05, "loss": 0.6536, "step": 5268 }, { "epoch": 1.0173778721760958, "grad_norm": 1.0888290405273438, "learning_rate": 5.718699894081796e-05, "loss": 0.5752, "step": 5269 }, { "epoch": 1.017570959644719, "grad_norm": 0.7936128973960876, "learning_rate": 5.71703230945029e-05, "loss": 0.5909, "step": 5270 }, { "epoch": 1.0177640471133425, "grad_norm": 0.848372757434845, "learning_rate": 5.715364643382065e-05, "loss": 0.6265, "step": 5271 }, { "epoch": 1.0179571345819656, "grad_norm": 1.436484932899475, "learning_rate": 5.7136968960665274e-05, "loss": 0.5904, "step": 5272 }, { "epoch": 1.018150222050589, "grad_norm": 1.5946848392486572, "learning_rate": 5.7120290676930864e-05, "loss": 0.6123, "step": 5273 }, { "epoch": 1.0183433095192123, "grad_norm": 0.6623566150665283, "learning_rate": 5.7103611584511684e-05, "loss": 0.6116, "step": 5274 }, { "epoch": 1.0185363969878356, "grad_norm": 1.6315008401870728, "learning_rate": 5.708693168530206e-05, "loss": 0.6644, "step": 5275 }, { "epoch": 1.0187294844564587, "grad_norm": 0.6836603879928589, "learning_rate": 5.707025098119639e-05, "loss": 0.6763, "step": 5276 }, { "epoch": 1.018922571925082, "grad_norm": 0.8649719953536987, "learning_rate": 5.7053569474089186e-05, "loss": 0.6217, "step": 5277 }, { "epoch": 1.0191156593937054, "grad_norm": 2.1786460876464844, "learning_rate": 5.703688716587505e-05, "loss": 0.6296, "step": 5278 }, { "epoch": 1.0193087468623285, "grad_norm": 0.794819712638855, "learning_rate": 5.702020405844868e-05, "loss": 0.5477, "step": 5279 }, { "epoch": 1.0195018343309519, "grad_norm": 1.1338038444519043, "learning_rate": 5.7003520153704806e-05, "loss": 0.6319, "step": 5280 }, { "epoch": 1.0196949217995752, "grad_norm": 0.9261267781257629, "learning_rate": 5.6986835453538354e-05, "loss": 0.6509, "step": 5281 }, { "epoch": 1.0198880092681986, "grad_norm": 0.9882427453994751, "learning_rate": 5.697014995984426e-05, "loss": 0.5637, "step": 5282 }, { "epoch": 1.0200810967368217, "grad_norm": 0.8648658394813538, "learning_rate": 5.695346367451756e-05, "loss": 0.6551, "step": 5283 }, { "epoch": 1.020274184205445, "grad_norm": 1.2592823505401611, "learning_rate": 5.6936776599453424e-05, "loss": 0.6205, "step": 5284 }, { "epoch": 1.0204672716740684, "grad_norm": 1.0101208686828613, "learning_rate": 5.692008873654704e-05, "loss": 0.6005, "step": 5285 }, { "epoch": 1.0206603591426917, "grad_norm": 1.858128547668457, "learning_rate": 5.690340008769378e-05, "loss": 0.6691, "step": 5286 }, { "epoch": 1.0208534466113148, "grad_norm": 1.1263909339904785, "learning_rate": 5.688671065478901e-05, "loss": 0.6718, "step": 5287 }, { "epoch": 1.0210465340799382, "grad_norm": 1.3298600912094116, "learning_rate": 5.687002043972824e-05, "loss": 0.6277, "step": 5288 }, { "epoch": 1.0212396215485615, "grad_norm": 1.0358211994171143, "learning_rate": 5.685332944440706e-05, "loss": 0.5509, "step": 5289 }, { "epoch": 1.0214327090171849, "grad_norm": 2.0886852741241455, "learning_rate": 5.683663767072116e-05, "loss": 0.6539, "step": 5290 }, { "epoch": 1.021625796485808, "grad_norm": 2.8927154541015625, "learning_rate": 5.6819945120566254e-05, "loss": 0.6906, "step": 5291 }, { "epoch": 1.0218188839544313, "grad_norm": 1.2526707649230957, "learning_rate": 5.680325179583824e-05, "loss": 0.673, "step": 5292 }, { "epoch": 1.0220119714230547, "grad_norm": 1.9830394983291626, "learning_rate": 5.678655769843304e-05, "loss": 0.6238, "step": 5293 }, { "epoch": 1.022205058891678, "grad_norm": 2.0572915077209473, "learning_rate": 5.676986283024668e-05, "loss": 0.6177, "step": 5294 }, { "epoch": 1.0223981463603011, "grad_norm": 1.1183940172195435, "learning_rate": 5.675316719317527e-05, "loss": 0.7276, "step": 5295 }, { "epoch": 1.0225912338289245, "grad_norm": 2.658381938934326, "learning_rate": 5.6736470789115036e-05, "loss": 0.5937, "step": 5296 }, { "epoch": 1.0227843212975478, "grad_norm": 0.8312837481498718, "learning_rate": 5.671977361996222e-05, "loss": 0.5895, "step": 5297 }, { "epoch": 1.0229774087661712, "grad_norm": 0.891402542591095, "learning_rate": 5.670307568761325e-05, "loss": 0.6263, "step": 5298 }, { "epoch": 1.0231704962347943, "grad_norm": 1.2582659721374512, "learning_rate": 5.668637699396455e-05, "loss": 0.7049, "step": 5299 }, { "epoch": 1.0233635837034176, "grad_norm": 0.915619432926178, "learning_rate": 5.666967754091269e-05, "loss": 0.6026, "step": 5300 }, { "epoch": 1.023556671172041, "grad_norm": 1.31666898727417, "learning_rate": 5.6652977330354306e-05, "loss": 0.6172, "step": 5301 }, { "epoch": 1.0237497586406643, "grad_norm": 5.500572681427002, "learning_rate": 5.6636276364186105e-05, "loss": 0.5782, "step": 5302 }, { "epoch": 1.0239428461092874, "grad_norm": 0.8085219264030457, "learning_rate": 5.661957464430489e-05, "loss": 0.594, "step": 5303 }, { "epoch": 1.0241359335779108, "grad_norm": 1.3055709600448608, "learning_rate": 5.660287217260759e-05, "loss": 0.6005, "step": 5304 }, { "epoch": 1.0243290210465341, "grad_norm": 1.0147055387496948, "learning_rate": 5.6586168950991133e-05, "loss": 0.5646, "step": 5305 }, { "epoch": 1.0245221085151575, "grad_norm": 1.0603917837142944, "learning_rate": 5.6569464981352616e-05, "loss": 0.5985, "step": 5306 }, { "epoch": 1.0247151959837806, "grad_norm": 13.122359275817871, "learning_rate": 5.655276026558919e-05, "loss": 0.5988, "step": 5307 }, { "epoch": 1.024908283452404, "grad_norm": 2.1133456230163574, "learning_rate": 5.653605480559806e-05, "loss": 0.6172, "step": 5308 }, { "epoch": 1.0251013709210273, "grad_norm": 0.8157146573066711, "learning_rate": 5.6519348603276554e-05, "loss": 0.6042, "step": 5309 }, { "epoch": 1.0252944583896506, "grad_norm": 1.8245657682418823, "learning_rate": 5.6502641660522095e-05, "loss": 0.6598, "step": 5310 }, { "epoch": 1.0254875458582737, "grad_norm": 5.373621940612793, "learning_rate": 5.6485933979232155e-05, "loss": 0.6201, "step": 5311 }, { "epoch": 1.025680633326897, "grad_norm": 0.9113233089447021, "learning_rate": 5.64692255613043e-05, "loss": 0.6239, "step": 5312 }, { "epoch": 1.0258737207955204, "grad_norm": 1.9153671264648438, "learning_rate": 5.645251640863616e-05, "loss": 0.6275, "step": 5313 }, { "epoch": 1.0260668082641438, "grad_norm": 1.3060698509216309, "learning_rate": 5.643580652312551e-05, "loss": 0.5767, "step": 5314 }, { "epoch": 1.0262598957327669, "grad_norm": 0.9900599718093872, "learning_rate": 5.6419095906670175e-05, "loss": 0.6756, "step": 5315 }, { "epoch": 1.0264529832013902, "grad_norm": 1.509132742881775, "learning_rate": 5.640238456116801e-05, "loss": 0.6535, "step": 5316 }, { "epoch": 1.0266460706700136, "grad_norm": 1.2305786609649658, "learning_rate": 5.638567248851705e-05, "loss": 0.6401, "step": 5317 }, { "epoch": 1.026839158138637, "grad_norm": 1.1049718856811523, "learning_rate": 5.636895969061533e-05, "loss": 0.6129, "step": 5318 }, { "epoch": 1.02703224560726, "grad_norm": 0.9414023756980896, "learning_rate": 5.6352246169361024e-05, "loss": 0.5665, "step": 5319 }, { "epoch": 1.0272253330758834, "grad_norm": 1.600422739982605, "learning_rate": 5.633553192665234e-05, "loss": 0.6391, "step": 5320 }, { "epoch": 1.0274184205445067, "grad_norm": 2.438227891921997, "learning_rate": 5.631881696438763e-05, "loss": 0.5368, "step": 5321 }, { "epoch": 1.02761150801313, "grad_norm": 1.100996971130371, "learning_rate": 5.630210128446526e-05, "loss": 0.6989, "step": 5322 }, { "epoch": 1.0278045954817532, "grad_norm": 1.0252904891967773, "learning_rate": 5.62853848887837e-05, "loss": 0.6082, "step": 5323 }, { "epoch": 1.0279976829503765, "grad_norm": 0.877652108669281, "learning_rate": 5.626866777924155e-05, "loss": 0.5703, "step": 5324 }, { "epoch": 1.0281907704189999, "grad_norm": 1.1477365493774414, "learning_rate": 5.625194995773741e-05, "loss": 0.7123, "step": 5325 }, { "epoch": 1.028383857887623, "grad_norm": 2.5042569637298584, "learning_rate": 5.623523142617002e-05, "loss": 0.6901, "step": 5326 }, { "epoch": 1.0285769453562463, "grad_norm": 1.6917434930801392, "learning_rate": 5.6218512186438196e-05, "loss": 0.6309, "step": 5327 }, { "epoch": 1.0287700328248697, "grad_norm": 1.3750215768814087, "learning_rate": 5.62017922404408e-05, "loss": 0.7124, "step": 5328 }, { "epoch": 1.028963120293493, "grad_norm": 1.228283405303955, "learning_rate": 5.6185071590076774e-05, "loss": 0.5741, "step": 5329 }, { "epoch": 1.0291562077621161, "grad_norm": 1.7263522148132324, "learning_rate": 5.616835023724521e-05, "loss": 0.539, "step": 5330 }, { "epoch": 1.0293492952307395, "grad_norm": 0.9296664595603943, "learning_rate": 5.61516281838452e-05, "loss": 0.6191, "step": 5331 }, { "epoch": 1.0295423826993628, "grad_norm": 0.8027777075767517, "learning_rate": 5.6134905431775964e-05, "loss": 0.6112, "step": 5332 }, { "epoch": 1.0297354701679862, "grad_norm": 1.9762334823608398, "learning_rate": 5.611818198293674e-05, "loss": 0.6538, "step": 5333 }, { "epoch": 1.0299285576366093, "grad_norm": 1.3956067562103271, "learning_rate": 5.6101457839226936e-05, "loss": 0.5739, "step": 5334 }, { "epoch": 1.0301216451052326, "grad_norm": 1.3816412687301636, "learning_rate": 5.608473300254598e-05, "loss": 0.5774, "step": 5335 }, { "epoch": 1.030314732573856, "grad_norm": 2.6878509521484375, "learning_rate": 5.606800747479338e-05, "loss": 0.594, "step": 5336 }, { "epoch": 1.0305078200424793, "grad_norm": 1.4144024848937988, "learning_rate": 5.6051281257868736e-05, "loss": 0.6032, "step": 5337 }, { "epoch": 1.0307009075111024, "grad_norm": 2.0559298992156982, "learning_rate": 5.603455435367173e-05, "loss": 0.604, "step": 5338 }, { "epoch": 1.0308939949797258, "grad_norm": 1.4085248708724976, "learning_rate": 5.601782676410211e-05, "loss": 0.6603, "step": 5339 }, { "epoch": 1.0310870824483491, "grad_norm": 0.9876217246055603, "learning_rate": 5.6001098491059704e-05, "loss": 0.6311, "step": 5340 }, { "epoch": 1.0312801699169725, "grad_norm": 1.4891785383224487, "learning_rate": 5.598436953644441e-05, "loss": 0.6394, "step": 5341 }, { "epoch": 1.0314732573855956, "grad_norm": 0.7938770651817322, "learning_rate": 5.5967639902156255e-05, "loss": 0.6422, "step": 5342 }, { "epoch": 1.031666344854219, "grad_norm": 0.7966576218605042, "learning_rate": 5.5950909590095245e-05, "loss": 0.6672, "step": 5343 }, { "epoch": 1.0318594323228423, "grad_norm": 0.7592454552650452, "learning_rate": 5.593417860216156e-05, "loss": 0.5976, "step": 5344 }, { "epoch": 1.0320525197914656, "grad_norm": 1.010696530342102, "learning_rate": 5.5917446940255395e-05, "loss": 0.563, "step": 5345 }, { "epoch": 1.0322456072600887, "grad_norm": 0.9989866018295288, "learning_rate": 5.590071460627706e-05, "loss": 0.5788, "step": 5346 }, { "epoch": 1.032438694728712, "grad_norm": 0.7089399695396423, "learning_rate": 5.588398160212692e-05, "loss": 0.6361, "step": 5347 }, { "epoch": 1.0326317821973354, "grad_norm": 1.2663371562957764, "learning_rate": 5.586724792970541e-05, "loss": 0.6326, "step": 5348 }, { "epoch": 1.0328248696659588, "grad_norm": 0.8620501160621643, "learning_rate": 5.585051359091308e-05, "loss": 0.5576, "step": 5349 }, { "epoch": 1.0330179571345819, "grad_norm": 1.458023190498352, "learning_rate": 5.583377858765049e-05, "loss": 0.6462, "step": 5350 }, { "epoch": 1.0332110446032052, "grad_norm": 1.1125484704971313, "learning_rate": 5.581704292181833e-05, "loss": 0.5946, "step": 5351 }, { "epoch": 1.0334041320718286, "grad_norm": 3.3118252754211426, "learning_rate": 5.580030659531734e-05, "loss": 0.5403, "step": 5352 }, { "epoch": 1.033597219540452, "grad_norm": 89.99138641357422, "learning_rate": 5.578356961004838e-05, "loss": 0.6412, "step": 5353 }, { "epoch": 1.033790307009075, "grad_norm": 0.9376798868179321, "learning_rate": 5.5766831967912295e-05, "loss": 0.5843, "step": 5354 }, { "epoch": 1.0339833944776984, "grad_norm": 1.506834864616394, "learning_rate": 5.575009367081009e-05, "loss": 0.6353, "step": 5355 }, { "epoch": 1.0341764819463217, "grad_norm": 0.9974458813667297, "learning_rate": 5.573335472064278e-05, "loss": 0.6425, "step": 5356 }, { "epoch": 1.034369569414945, "grad_norm": 2.5148215293884277, "learning_rate": 5.5716615119311545e-05, "loss": 0.6289, "step": 5357 }, { "epoch": 1.0345626568835682, "grad_norm": 1.0012913942337036, "learning_rate": 5.569987486871753e-05, "loss": 0.6421, "step": 5358 }, { "epoch": 1.0347557443521915, "grad_norm": 1.0713335275650024, "learning_rate": 5.5683133970762003e-05, "loss": 0.6158, "step": 5359 }, { "epoch": 1.0349488318208149, "grad_norm": 1.2489397525787354, "learning_rate": 5.5666392427346334e-05, "loss": 0.6193, "step": 5360 }, { "epoch": 1.0351419192894382, "grad_norm": 1.4789958000183105, "learning_rate": 5.564965024037193e-05, "loss": 0.5564, "step": 5361 }, { "epoch": 1.0353350067580613, "grad_norm": 1.1584113836288452, "learning_rate": 5.563290741174026e-05, "loss": 0.626, "step": 5362 }, { "epoch": 1.0355280942266847, "grad_norm": 1.5225422382354736, "learning_rate": 5.5616163943352916e-05, "loss": 0.6133, "step": 5363 }, { "epoch": 1.035721181695308, "grad_norm": 1.5870583057403564, "learning_rate": 5.559941983711151e-05, "loss": 0.6273, "step": 5364 }, { "epoch": 1.0359142691639314, "grad_norm": 1.785798192024231, "learning_rate": 5.5582675094917744e-05, "loss": 0.6585, "step": 5365 }, { "epoch": 1.0361073566325545, "grad_norm": 0.7275159358978271, "learning_rate": 5.5565929718673414e-05, "loss": 0.5245, "step": 5366 }, { "epoch": 1.0363004441011778, "grad_norm": 0.9441995024681091, "learning_rate": 5.554918371028039e-05, "loss": 0.5905, "step": 5367 }, { "epoch": 1.0364935315698012, "grad_norm": 1.0222182273864746, "learning_rate": 5.553243707164054e-05, "loss": 0.6289, "step": 5368 }, { "epoch": 1.0366866190384245, "grad_norm": 1.0954532623291016, "learning_rate": 5.551568980465589e-05, "loss": 0.6403, "step": 5369 }, { "epoch": 1.0368797065070476, "grad_norm": 0.7005116939544678, "learning_rate": 5.549894191122852e-05, "loss": 0.6883, "step": 5370 }, { "epoch": 1.037072793975671, "grad_norm": 1.2250480651855469, "learning_rate": 5.548219339326054e-05, "loss": 0.6302, "step": 5371 }, { "epoch": 1.0372658814442943, "grad_norm": 1.9074928760528564, "learning_rate": 5.5465444252654166e-05, "loss": 0.6909, "step": 5372 }, { "epoch": 1.0374589689129174, "grad_norm": 1.111388087272644, "learning_rate": 5.544869449131168e-05, "loss": 0.6077, "step": 5373 }, { "epoch": 1.0376520563815408, "grad_norm": 6.1238017082214355, "learning_rate": 5.543194411113544e-05, "loss": 0.7015, "step": 5374 }, { "epoch": 1.0378451438501641, "grad_norm": 0.8558908700942993, "learning_rate": 5.541519311402784e-05, "loss": 0.5789, "step": 5375 }, { "epoch": 1.0380382313187875, "grad_norm": 0.6566475033760071, "learning_rate": 5.539844150189138e-05, "loss": 0.6639, "step": 5376 }, { "epoch": 1.0382313187874106, "grad_norm": 1.3184499740600586, "learning_rate": 5.538168927662862e-05, "loss": 0.5831, "step": 5377 }, { "epoch": 1.038424406256034, "grad_norm": 0.9487969875335693, "learning_rate": 5.536493644014222e-05, "loss": 0.6298, "step": 5378 }, { "epoch": 1.0386174937246573, "grad_norm": 2.8432652950286865, "learning_rate": 5.534818299433483e-05, "loss": 0.5844, "step": 5379 }, { "epoch": 1.0388105811932806, "grad_norm": 1.1977763175964355, "learning_rate": 5.533142894110923e-05, "loss": 0.6829, "step": 5380 }, { "epoch": 1.0390036686619037, "grad_norm": 0.9365636110305786, "learning_rate": 5.5314674282368275e-05, "loss": 0.5964, "step": 5381 }, { "epoch": 1.039196756130527, "grad_norm": 1.4175593852996826, "learning_rate": 5.5297919020014856e-05, "loss": 0.579, "step": 5382 }, { "epoch": 1.0393898435991504, "grad_norm": 1.4588203430175781, "learning_rate": 5.5281163155951946e-05, "loss": 0.619, "step": 5383 }, { "epoch": 1.0395829310677738, "grad_norm": 1.3851819038391113, "learning_rate": 5.526440669208259e-05, "loss": 0.6686, "step": 5384 }, { "epoch": 1.039776018536397, "grad_norm": 1.1329931020736694, "learning_rate": 5.524764963030991e-05, "loss": 0.6304, "step": 5385 }, { "epoch": 1.0399691060050202, "grad_norm": 0.8132599592208862, "learning_rate": 5.523089197253705e-05, "loss": 0.5869, "step": 5386 }, { "epoch": 1.0401621934736436, "grad_norm": 1.0706977844238281, "learning_rate": 5.521413372066728e-05, "loss": 0.6844, "step": 5387 }, { "epoch": 1.040355280942267, "grad_norm": 1.2672783136367798, "learning_rate": 5.519737487660392e-05, "loss": 0.634, "step": 5388 }, { "epoch": 1.04054836841089, "grad_norm": 1.106873869895935, "learning_rate": 5.518061544225034e-05, "loss": 0.638, "step": 5389 }, { "epoch": 1.0407414558795134, "grad_norm": 0.8463345766067505, "learning_rate": 5.516385541950999e-05, "loss": 0.7092, "step": 5390 }, { "epoch": 1.0409345433481367, "grad_norm": 1.8448846340179443, "learning_rate": 5.5147094810286384e-05, "loss": 0.5812, "step": 5391 }, { "epoch": 1.04112763081676, "grad_norm": 1.2394520044326782, "learning_rate": 5.51303336164831e-05, "loss": 0.6053, "step": 5392 }, { "epoch": 1.0413207182853832, "grad_norm": 0.8278684616088867, "learning_rate": 5.5113571840003795e-05, "loss": 0.633, "step": 5393 }, { "epoch": 1.0415138057540065, "grad_norm": 0.8085249662399292, "learning_rate": 5.509680948275215e-05, "loss": 0.5796, "step": 5394 }, { "epoch": 1.0417068932226299, "grad_norm": 2.155867099761963, "learning_rate": 5.5080046546632e-05, "loss": 0.6365, "step": 5395 }, { "epoch": 1.0418999806912532, "grad_norm": 1.2779806852340698, "learning_rate": 5.506328303354715e-05, "loss": 0.6426, "step": 5396 }, { "epoch": 1.0420930681598763, "grad_norm": 1.2869497537612915, "learning_rate": 5.5046518945401504e-05, "loss": 0.6454, "step": 5397 }, { "epoch": 1.0422861556284997, "grad_norm": 0.9993500709533691, "learning_rate": 5.502975428409908e-05, "loss": 0.5288, "step": 5398 }, { "epoch": 1.042479243097123, "grad_norm": 3.0327107906341553, "learning_rate": 5.5012989051543886e-05, "loss": 0.6533, "step": 5399 }, { "epoch": 1.0426723305657464, "grad_norm": 0.6871443390846252, "learning_rate": 5.499622324964002e-05, "loss": 0.6533, "step": 5400 }, { "epoch": 1.0428654180343695, "grad_norm": 0.8975365161895752, "learning_rate": 5.497945688029168e-05, "loss": 0.5904, "step": 5401 }, { "epoch": 1.0430585055029928, "grad_norm": 0.7395150065422058, "learning_rate": 5.496268994540309e-05, "loss": 0.5837, "step": 5402 }, { "epoch": 1.0432515929716162, "grad_norm": 6.822457313537598, "learning_rate": 5.494592244687854e-05, "loss": 0.615, "step": 5403 }, { "epoch": 1.0434446804402395, "grad_norm": 1.2839432954788208, "learning_rate": 5.492915438662242e-05, "loss": 0.5434, "step": 5404 }, { "epoch": 1.0436377679088626, "grad_norm": 1.2745815515518188, "learning_rate": 5.491238576653911e-05, "loss": 0.5618, "step": 5405 }, { "epoch": 1.043830855377486, "grad_norm": 0.8037213087081909, "learning_rate": 5.489561658853315e-05, "loss": 0.6507, "step": 5406 }, { "epoch": 1.0440239428461093, "grad_norm": 1.9134092330932617, "learning_rate": 5.487884685450908e-05, "loss": 0.6064, "step": 5407 }, { "epoch": 1.0442170303147327, "grad_norm": 0.8037579655647278, "learning_rate": 5.486207656637149e-05, "loss": 0.6655, "step": 5408 }, { "epoch": 1.0444101177833558, "grad_norm": 0.7811781764030457, "learning_rate": 5.484530572602511e-05, "loss": 0.6301, "step": 5409 }, { "epoch": 1.0446032052519791, "grad_norm": 1.069223403930664, "learning_rate": 5.482853433537464e-05, "loss": 0.6615, "step": 5410 }, { "epoch": 1.0447962927206025, "grad_norm": 1.4005804061889648, "learning_rate": 5.4811762396324885e-05, "loss": 0.5795, "step": 5411 }, { "epoch": 1.0449893801892258, "grad_norm": 1.683565378189087, "learning_rate": 5.479498991078074e-05, "loss": 0.6034, "step": 5412 }, { "epoch": 1.045182467657849, "grad_norm": 1.4694868326187134, "learning_rate": 5.477821688064714e-05, "loss": 0.5962, "step": 5413 }, { "epoch": 1.0453755551264723, "grad_norm": 1.332438588142395, "learning_rate": 5.476144330782903e-05, "loss": 0.6181, "step": 5414 }, { "epoch": 1.0455686425950956, "grad_norm": 0.8355496525764465, "learning_rate": 5.474466919423151e-05, "loss": 0.6822, "step": 5415 }, { "epoch": 1.045761730063719, "grad_norm": 0.8287819027900696, "learning_rate": 5.472789454175966e-05, "loss": 0.6296, "step": 5416 }, { "epoch": 1.045954817532342, "grad_norm": 1.2356956005096436, "learning_rate": 5.4711119352318696e-05, "loss": 0.6295, "step": 5417 }, { "epoch": 1.0461479050009654, "grad_norm": 0.6792371273040771, "learning_rate": 5.469434362781383e-05, "loss": 0.6174, "step": 5418 }, { "epoch": 1.0463409924695888, "grad_norm": 0.6451554894447327, "learning_rate": 5.467756737015034e-05, "loss": 0.5844, "step": 5419 }, { "epoch": 1.046534079938212, "grad_norm": 1.0872290134429932, "learning_rate": 5.4660790581233625e-05, "loss": 0.5864, "step": 5420 }, { "epoch": 1.0467271674068352, "grad_norm": 1.283032774925232, "learning_rate": 5.464401326296907e-05, "loss": 0.6336, "step": 5421 }, { "epoch": 1.0469202548754586, "grad_norm": 1.239339828491211, "learning_rate": 5.4627235417262165e-05, "loss": 0.5354, "step": 5422 }, { "epoch": 1.047113342344082, "grad_norm": 0.8857525587081909, "learning_rate": 5.461045704601847e-05, "loss": 0.6295, "step": 5423 }, { "epoch": 1.047306429812705, "grad_norm": 0.6944084763526917, "learning_rate": 5.4593678151143555e-05, "loss": 0.5838, "step": 5424 }, { "epoch": 1.0474995172813284, "grad_norm": 0.9660025238990784, "learning_rate": 5.457689873454309e-05, "loss": 0.6512, "step": 5425 }, { "epoch": 1.0476926047499517, "grad_norm": 1.188246488571167, "learning_rate": 5.4560118798122773e-05, "loss": 0.5647, "step": 5426 }, { "epoch": 1.047885692218575, "grad_norm": 0.9834226369857788, "learning_rate": 5.454333834378843e-05, "loss": 0.6894, "step": 5427 }, { "epoch": 1.0480787796871982, "grad_norm": 1.6262463331222534, "learning_rate": 5.452655737344583e-05, "loss": 0.5986, "step": 5428 }, { "epoch": 1.0482718671558215, "grad_norm": 1.064985752105713, "learning_rate": 5.4509775889000905e-05, "loss": 0.6035, "step": 5429 }, { "epoch": 1.048464954624445, "grad_norm": 1.1757676601409912, "learning_rate": 5.4492993892359614e-05, "loss": 0.6459, "step": 5430 }, { "epoch": 1.0486580420930682, "grad_norm": 0.916950523853302, "learning_rate": 5.4476211385427945e-05, "loss": 0.5637, "step": 5431 }, { "epoch": 1.0488511295616914, "grad_norm": 0.7332997918128967, "learning_rate": 5.445942837011197e-05, "loss": 0.6611, "step": 5432 }, { "epoch": 1.0490442170303147, "grad_norm": 0.7245256900787354, "learning_rate": 5.444264484831783e-05, "loss": 0.6386, "step": 5433 }, { "epoch": 1.049237304498938, "grad_norm": 1.3450396060943604, "learning_rate": 5.442586082195169e-05, "loss": 0.6208, "step": 5434 }, { "epoch": 1.0494303919675614, "grad_norm": 1.0543431043624878, "learning_rate": 5.440907629291979e-05, "loss": 0.6112, "step": 5435 }, { "epoch": 1.0496234794361845, "grad_norm": 1.4992390871047974, "learning_rate": 5.439229126312845e-05, "loss": 0.6235, "step": 5436 }, { "epoch": 1.0498165669048078, "grad_norm": 0.9865072965621948, "learning_rate": 5.4375505734484e-05, "loss": 0.5537, "step": 5437 }, { "epoch": 1.0500096543734312, "grad_norm": 0.793889582157135, "learning_rate": 5.435871970889288e-05, "loss": 0.6007, "step": 5438 }, { "epoch": 1.0502027418420545, "grad_norm": 0.8634839653968811, "learning_rate": 5.434193318826151e-05, "loss": 0.562, "step": 5439 }, { "epoch": 1.0503958293106777, "grad_norm": 1.6726688146591187, "learning_rate": 5.432514617449644e-05, "loss": 0.6038, "step": 5440 }, { "epoch": 1.050588916779301, "grad_norm": 0.7362019419670105, "learning_rate": 5.4308358669504276e-05, "loss": 0.6565, "step": 5441 }, { "epoch": 1.0507820042479243, "grad_norm": 0.7184159159660339, "learning_rate": 5.4291570675191624e-05, "loss": 0.5554, "step": 5442 }, { "epoch": 1.0509750917165477, "grad_norm": 1.1335511207580566, "learning_rate": 5.427478219346518e-05, "loss": 0.6157, "step": 5443 }, { "epoch": 1.0511681791851708, "grad_norm": 1.5262184143066406, "learning_rate": 5.425799322623169e-05, "loss": 0.6022, "step": 5444 }, { "epoch": 1.0513612666537941, "grad_norm": 1.1551928520202637, "learning_rate": 5.4241203775397955e-05, "loss": 0.6462, "step": 5445 }, { "epoch": 1.0515543541224175, "grad_norm": 0.7278279066085815, "learning_rate": 5.4224413842870835e-05, "loss": 0.566, "step": 5446 }, { "epoch": 1.0517474415910408, "grad_norm": 0.9523142576217651, "learning_rate": 5.4207623430557244e-05, "loss": 0.5186, "step": 5447 }, { "epoch": 1.051940529059664, "grad_norm": 1.1509027481079102, "learning_rate": 5.4190832540364144e-05, "loss": 0.6321, "step": 5448 }, { "epoch": 1.0521336165282873, "grad_norm": 1.1100612878799438, "learning_rate": 5.4174041174198545e-05, "loss": 0.6366, "step": 5449 }, { "epoch": 1.0523267039969106, "grad_norm": 0.760414719581604, "learning_rate": 5.415724933396753e-05, "loss": 0.5348, "step": 5450 }, { "epoch": 1.052519791465534, "grad_norm": 0.7458112239837646, "learning_rate": 5.414045702157824e-05, "loss": 0.6483, "step": 5451 }, { "epoch": 1.052712878934157, "grad_norm": 1.0432652235031128, "learning_rate": 5.412366423893783e-05, "loss": 0.6264, "step": 5452 }, { "epoch": 1.0529059664027804, "grad_norm": 0.8180701732635498, "learning_rate": 5.4106870987953575e-05, "loss": 0.642, "step": 5453 }, { "epoch": 1.0530990538714038, "grad_norm": 1.4850281476974487, "learning_rate": 5.409007727053271e-05, "loss": 0.5967, "step": 5454 }, { "epoch": 1.0532921413400271, "grad_norm": 0.741692304611206, "learning_rate": 5.4073283088582614e-05, "loss": 0.662, "step": 5455 }, { "epoch": 1.0534852288086503, "grad_norm": 0.7728059887886047, "learning_rate": 5.405648844401069e-05, "loss": 0.5737, "step": 5456 }, { "epoch": 1.0536783162772736, "grad_norm": 0.9100304245948792, "learning_rate": 5.403969333872434e-05, "loss": 0.6307, "step": 5457 }, { "epoch": 1.053871403745897, "grad_norm": 1.0309423208236694, "learning_rate": 5.40228977746311e-05, "loss": 0.5766, "step": 5458 }, { "epoch": 1.0540644912145203, "grad_norm": 0.787386417388916, "learning_rate": 5.4006101753638506e-05, "loss": 0.5963, "step": 5459 }, { "epoch": 1.0542575786831434, "grad_norm": 0.7050822973251343, "learning_rate": 5.3989305277654156e-05, "loss": 0.5899, "step": 5460 }, { "epoch": 1.0544506661517667, "grad_norm": 1.3091154098510742, "learning_rate": 5.3972508348585724e-05, "loss": 0.6423, "step": 5461 }, { "epoch": 1.05464375362039, "grad_norm": 2.1623034477233887, "learning_rate": 5.39557109683409e-05, "loss": 0.6057, "step": 5462 }, { "epoch": 1.0548368410890134, "grad_norm": 0.7321025729179382, "learning_rate": 5.393891313882745e-05, "loss": 0.6027, "step": 5463 }, { "epoch": 1.0550299285576366, "grad_norm": 1.377424716949463, "learning_rate": 5.392211486195318e-05, "loss": 0.6012, "step": 5464 }, { "epoch": 1.05522301602626, "grad_norm": 1.1433912515640259, "learning_rate": 5.3905316139625925e-05, "loss": 0.6074, "step": 5465 }, { "epoch": 1.0554161034948832, "grad_norm": 1.2451393604278564, "learning_rate": 5.3888516973753645e-05, "loss": 0.6066, "step": 5466 }, { "epoch": 1.0556091909635064, "grad_norm": 3.6058008670806885, "learning_rate": 5.3871717366244254e-05, "loss": 0.59, "step": 5467 }, { "epoch": 1.0558022784321297, "grad_norm": 1.0411996841430664, "learning_rate": 5.385491731900578e-05, "loss": 0.5616, "step": 5468 }, { "epoch": 1.055995365900753, "grad_norm": 0.8879721760749817, "learning_rate": 5.3838116833946294e-05, "loss": 0.62, "step": 5469 }, { "epoch": 1.0561884533693764, "grad_norm": 0.7631381154060364, "learning_rate": 5.382131591297389e-05, "loss": 0.5623, "step": 5470 }, { "epoch": 1.0563815408379995, "grad_norm": 1.20835280418396, "learning_rate": 5.3804514557996725e-05, "loss": 0.5614, "step": 5471 }, { "epoch": 1.0565746283066229, "grad_norm": 0.7985623478889465, "learning_rate": 5.378771277092302e-05, "loss": 0.5417, "step": 5472 }, { "epoch": 1.0567677157752462, "grad_norm": 0.9306768178939819, "learning_rate": 5.377091055366104e-05, "loss": 0.5895, "step": 5473 }, { "epoch": 1.0569608032438695, "grad_norm": 1.3389692306518555, "learning_rate": 5.3754107908119046e-05, "loss": 0.6471, "step": 5474 }, { "epoch": 1.0571538907124927, "grad_norm": 2.5504281520843506, "learning_rate": 5.3737304836205436e-05, "loss": 0.6396, "step": 5475 }, { "epoch": 1.057346978181116, "grad_norm": 0.6852573156356812, "learning_rate": 5.372050133982862e-05, "loss": 0.6486, "step": 5476 }, { "epoch": 1.0575400656497393, "grad_norm": 0.9795103073120117, "learning_rate": 5.3703697420897025e-05, "loss": 0.6701, "step": 5477 }, { "epoch": 1.0577331531183627, "grad_norm": 0.9883099794387817, "learning_rate": 5.3686893081319164e-05, "loss": 0.5801, "step": 5478 }, { "epoch": 1.0579262405869858, "grad_norm": 1.0864371061325073, "learning_rate": 5.3670088323003573e-05, "loss": 0.577, "step": 5479 }, { "epoch": 1.0581193280556092, "grad_norm": 1.8471506834030151, "learning_rate": 5.3653283147858854e-05, "loss": 0.6469, "step": 5480 }, { "epoch": 1.0583124155242325, "grad_norm": 0.7581863403320312, "learning_rate": 5.3636477557793663e-05, "loss": 0.667, "step": 5481 }, { "epoch": 1.0585055029928558, "grad_norm": 0.6592957973480225, "learning_rate": 5.361967155471666e-05, "loss": 0.5982, "step": 5482 }, { "epoch": 1.058698590461479, "grad_norm": 1.1425929069519043, "learning_rate": 5.360286514053658e-05, "loss": 0.62, "step": 5483 }, { "epoch": 1.0588916779301023, "grad_norm": 1.219188928604126, "learning_rate": 5.358605831716227e-05, "loss": 0.6111, "step": 5484 }, { "epoch": 1.0590847653987256, "grad_norm": 0.726974368095398, "learning_rate": 5.356925108650248e-05, "loss": 0.6446, "step": 5485 }, { "epoch": 1.059277852867349, "grad_norm": 0.6189942359924316, "learning_rate": 5.355244345046612e-05, "loss": 0.6126, "step": 5486 }, { "epoch": 1.0594709403359721, "grad_norm": 0.6204934120178223, "learning_rate": 5.353563541096213e-05, "loss": 0.5687, "step": 5487 }, { "epoch": 1.0596640278045955, "grad_norm": 0.8646949529647827, "learning_rate": 5.351882696989945e-05, "loss": 0.6031, "step": 5488 }, { "epoch": 1.0598571152732188, "grad_norm": 0.6468294858932495, "learning_rate": 5.350201812918709e-05, "loss": 0.6579, "step": 5489 }, { "epoch": 1.0600502027418421, "grad_norm": 0.5614685416221619, "learning_rate": 5.348520889073413e-05, "loss": 0.642, "step": 5490 }, { "epoch": 1.0602432902104653, "grad_norm": 0.8390681147575378, "learning_rate": 5.3468399256449676e-05, "loss": 0.6288, "step": 5491 }, { "epoch": 1.0604363776790886, "grad_norm": 2.0163278579711914, "learning_rate": 5.345158922824285e-05, "loss": 0.58, "step": 5492 }, { "epoch": 1.060629465147712, "grad_norm": 0.6310498714447021, "learning_rate": 5.343477880802287e-05, "loss": 0.5958, "step": 5493 }, { "epoch": 1.0608225526163353, "grad_norm": 0.6159307956695557, "learning_rate": 5.341796799769897e-05, "loss": 0.5879, "step": 5494 }, { "epoch": 1.0610156400849584, "grad_norm": 0.6325063109397888, "learning_rate": 5.3401156799180416e-05, "loss": 0.5832, "step": 5495 }, { "epoch": 1.0612087275535818, "grad_norm": 0.7020718455314636, "learning_rate": 5.3384345214376564e-05, "loss": 0.5812, "step": 5496 }, { "epoch": 1.061401815022205, "grad_norm": 0.9023616313934326, "learning_rate": 5.336753324519674e-05, "loss": 0.6259, "step": 5497 }, { "epoch": 1.0615949024908284, "grad_norm": 0.8794880509376526, "learning_rate": 5.335072089355041e-05, "loss": 0.6425, "step": 5498 }, { "epoch": 1.0617879899594516, "grad_norm": 1.539597511291504, "learning_rate": 5.333390816134701e-05, "loss": 0.6322, "step": 5499 }, { "epoch": 1.061981077428075, "grad_norm": 0.6451554298400879, "learning_rate": 5.331709505049602e-05, "loss": 0.5532, "step": 5500 }, { "epoch": 1.061981077428075, "eval_loss": 0.6710407733917236, "eval_runtime": 50.0567, "eval_samples_per_second": 13.265, "eval_steps_per_second": 0.42, "step": 5500 }, { "epoch": 1.0621741648966982, "grad_norm": 3.2343342304229736, "learning_rate": 5.330028156290702e-05, "loss": 0.5141, "step": 5501 }, { "epoch": 1.0623672523653216, "grad_norm": 1.8569120168685913, "learning_rate": 5.328346770048956e-05, "loss": 0.5974, "step": 5502 }, { "epoch": 1.0625603398339447, "grad_norm": 0.6022987961769104, "learning_rate": 5.3266653465153294e-05, "loss": 0.622, "step": 5503 }, { "epoch": 1.062753427302568, "grad_norm": 0.6403718590736389, "learning_rate": 5.32498388588079e-05, "loss": 0.5767, "step": 5504 }, { "epoch": 1.0629465147711914, "grad_norm": 0.5584492087364197, "learning_rate": 5.3233023883363064e-05, "loss": 0.5451, "step": 5505 }, { "epoch": 1.0631396022398145, "grad_norm": 0.7740649580955505, "learning_rate": 5.321620854072856e-05, "loss": 0.5789, "step": 5506 }, { "epoch": 1.0633326897084379, "grad_norm": 0.7779018878936768, "learning_rate": 5.319939283281419e-05, "loss": 0.6091, "step": 5507 }, { "epoch": 1.0635257771770612, "grad_norm": 0.6106635332107544, "learning_rate": 5.318257676152978e-05, "loss": 0.6459, "step": 5508 }, { "epoch": 1.0637188646456845, "grad_norm": 0.7067995071411133, "learning_rate": 5.31657603287852e-05, "loss": 0.6507, "step": 5509 }, { "epoch": 1.063911952114308, "grad_norm": 0.6863414645195007, "learning_rate": 5.31489435364904e-05, "loss": 0.5715, "step": 5510 }, { "epoch": 1.064105039582931, "grad_norm": 0.9034574031829834, "learning_rate": 5.3132126386555314e-05, "loss": 0.6669, "step": 5511 }, { "epoch": 1.0642981270515544, "grad_norm": 1.0385510921478271, "learning_rate": 5.311530888088997e-05, "loss": 0.5922, "step": 5512 }, { "epoch": 1.0644912145201777, "grad_norm": 0.6163562536239624, "learning_rate": 5.309849102140441e-05, "loss": 0.6111, "step": 5513 }, { "epoch": 1.0646843019888008, "grad_norm": 1.1066055297851562, "learning_rate": 5.308167281000868e-05, "loss": 0.5908, "step": 5514 }, { "epoch": 1.0648773894574242, "grad_norm": 0.8169314861297607, "learning_rate": 5.306485424861294e-05, "loss": 0.596, "step": 5515 }, { "epoch": 1.0650704769260475, "grad_norm": 0.7118750810623169, "learning_rate": 5.304803533912735e-05, "loss": 0.6352, "step": 5516 }, { "epoch": 1.0652635643946708, "grad_norm": 0.6440972089767456, "learning_rate": 5.3031216083462085e-05, "loss": 0.654, "step": 5517 }, { "epoch": 1.065456651863294, "grad_norm": 2.3128249645233154, "learning_rate": 5.3014396483527404e-05, "loss": 0.595, "step": 5518 }, { "epoch": 1.0656497393319173, "grad_norm": 0.6669443845748901, "learning_rate": 5.299757654123362e-05, "loss": 0.5764, "step": 5519 }, { "epoch": 1.0658428268005407, "grad_norm": 0.6664853692054749, "learning_rate": 5.2980756258490995e-05, "loss": 0.612, "step": 5520 }, { "epoch": 1.066035914269164, "grad_norm": 0.6649917960166931, "learning_rate": 5.296393563720993e-05, "loss": 0.6664, "step": 5521 }, { "epoch": 1.0662290017377871, "grad_norm": 0.7295341491699219, "learning_rate": 5.294711467930079e-05, "loss": 0.606, "step": 5522 }, { "epoch": 1.0664220892064105, "grad_norm": 0.8047571778297424, "learning_rate": 5.293029338667405e-05, "loss": 0.6404, "step": 5523 }, { "epoch": 1.0666151766750338, "grad_norm": 0.6017897129058838, "learning_rate": 5.291347176124014e-05, "loss": 0.6123, "step": 5524 }, { "epoch": 1.0668082641436571, "grad_norm": 0.8482427597045898, "learning_rate": 5.289664980490959e-05, "loss": 0.6225, "step": 5525 }, { "epoch": 1.0670013516122803, "grad_norm": 1.1359535455703735, "learning_rate": 5.287982751959296e-05, "loss": 0.661, "step": 5526 }, { "epoch": 1.0671944390809036, "grad_norm": 1.0474693775177002, "learning_rate": 5.286300490720082e-05, "loss": 0.6524, "step": 5527 }, { "epoch": 1.067387526549527, "grad_norm": 4.680508613586426, "learning_rate": 5.2846181969643796e-05, "loss": 0.6311, "step": 5528 }, { "epoch": 1.0675806140181503, "grad_norm": 2.3639817237854004, "learning_rate": 5.282935870883255e-05, "loss": 0.6446, "step": 5529 }, { "epoch": 1.0677737014867734, "grad_norm": 0.8437820076942444, "learning_rate": 5.2812535126677774e-05, "loss": 0.6227, "step": 5530 }, { "epoch": 1.0679667889553968, "grad_norm": 1.2968664169311523, "learning_rate": 5.279571122509021e-05, "loss": 0.6356, "step": 5531 }, { "epoch": 1.06815987642402, "grad_norm": 0.6805182695388794, "learning_rate": 5.27788870059806e-05, "loss": 0.5681, "step": 5532 }, { "epoch": 1.0683529638926434, "grad_norm": 0.5980876684188843, "learning_rate": 5.276206247125981e-05, "loss": 0.5981, "step": 5533 }, { "epoch": 1.0685460513612666, "grad_norm": 0.6350964307785034, "learning_rate": 5.274523762283862e-05, "loss": 0.5945, "step": 5534 }, { "epoch": 1.06873913882989, "grad_norm": 0.761654794216156, "learning_rate": 5.2728412462627916e-05, "loss": 0.6066, "step": 5535 }, { "epoch": 1.0689322262985133, "grad_norm": 0.6152109503746033, "learning_rate": 5.271158699253864e-05, "loss": 0.6461, "step": 5536 }, { "epoch": 1.0691253137671366, "grad_norm": 0.9051584005355835, "learning_rate": 5.2694761214481735e-05, "loss": 0.6208, "step": 5537 }, { "epoch": 1.0693184012357597, "grad_norm": 2.0559473037719727, "learning_rate": 5.267793513036815e-05, "loss": 0.6382, "step": 5538 }, { "epoch": 1.069511488704383, "grad_norm": 0.9207213521003723, "learning_rate": 5.2661108742108935e-05, "loss": 0.6418, "step": 5539 }, { "epoch": 1.0697045761730064, "grad_norm": 0.6866427659988403, "learning_rate": 5.2644282051615136e-05, "loss": 0.6025, "step": 5540 }, { "epoch": 1.0698976636416297, "grad_norm": 1.0241550207138062, "learning_rate": 5.2627455060797823e-05, "loss": 0.6175, "step": 5541 }, { "epoch": 1.0700907511102529, "grad_norm": 0.6270947456359863, "learning_rate": 5.2610627771568134e-05, "loss": 0.6078, "step": 5542 }, { "epoch": 1.0702838385788762, "grad_norm": 2.0108895301818848, "learning_rate": 5.2593800185837204e-05, "loss": 0.6387, "step": 5543 }, { "epoch": 1.0704769260474996, "grad_norm": 0.7586472034454346, "learning_rate": 5.257697230551626e-05, "loss": 0.5559, "step": 5544 }, { "epoch": 1.070670013516123, "grad_norm": 0.7353939414024353, "learning_rate": 5.256014413251647e-05, "loss": 0.6354, "step": 5545 }, { "epoch": 1.070863100984746, "grad_norm": 1.1696022748947144, "learning_rate": 5.254331566874912e-05, "loss": 0.6087, "step": 5546 }, { "epoch": 1.0710561884533694, "grad_norm": 0.9463365077972412, "learning_rate": 5.2526486916125494e-05, "loss": 0.5576, "step": 5547 }, { "epoch": 1.0712492759219927, "grad_norm": 1.2978571653366089, "learning_rate": 5.2509657876556916e-05, "loss": 0.6344, "step": 5548 }, { "epoch": 1.071442363390616, "grad_norm": 1.288944125175476, "learning_rate": 5.249282855195472e-05, "loss": 0.5903, "step": 5549 }, { "epoch": 1.0716354508592392, "grad_norm": 0.8101900219917297, "learning_rate": 5.247599894423031e-05, "loss": 0.6732, "step": 5550 }, { "epoch": 1.0718285383278625, "grad_norm": 1.1256251335144043, "learning_rate": 5.245916905529511e-05, "loss": 0.6492, "step": 5551 }, { "epoch": 1.0720216257964859, "grad_norm": 0.8659473061561584, "learning_rate": 5.244233888706054e-05, "loss": 0.6799, "step": 5552 }, { "epoch": 1.0722147132651092, "grad_norm": 0.7967813611030579, "learning_rate": 5.242550844143811e-05, "loss": 0.6218, "step": 5553 }, { "epoch": 1.0724078007337323, "grad_norm": 1.016240119934082, "learning_rate": 5.240867772033932e-05, "loss": 0.6104, "step": 5554 }, { "epoch": 1.0726008882023557, "grad_norm": 0.9939960241317749, "learning_rate": 5.23918467256757e-05, "loss": 0.6479, "step": 5555 }, { "epoch": 1.072793975670979, "grad_norm": 0.7429608702659607, "learning_rate": 5.2375015459358856e-05, "loss": 0.5905, "step": 5556 }, { "epoch": 1.0729870631396023, "grad_norm": 1.2987501621246338, "learning_rate": 5.2358183923300366e-05, "loss": 0.6469, "step": 5557 }, { "epoch": 1.0731801506082255, "grad_norm": 1.0917284488677979, "learning_rate": 5.234135211941189e-05, "loss": 0.6269, "step": 5558 }, { "epoch": 1.0733732380768488, "grad_norm": 0.9343789219856262, "learning_rate": 5.232452004960507e-05, "loss": 0.57, "step": 5559 }, { "epoch": 1.0735663255454722, "grad_norm": 0.9779486656188965, "learning_rate": 5.230768771579162e-05, "loss": 0.6293, "step": 5560 }, { "epoch": 1.0737594130140953, "grad_norm": 1.821465015411377, "learning_rate": 5.229085511988328e-05, "loss": 0.6186, "step": 5561 }, { "epoch": 1.0739525004827186, "grad_norm": 0.8319854140281677, "learning_rate": 5.227402226379178e-05, "loss": 0.6563, "step": 5562 }, { "epoch": 1.074145587951342, "grad_norm": 0.830203652381897, "learning_rate": 5.22571891494289e-05, "loss": 0.6502, "step": 5563 }, { "epoch": 1.0743386754199653, "grad_norm": 0.7016362547874451, "learning_rate": 5.2240355778706485e-05, "loss": 0.6066, "step": 5564 }, { "epoch": 1.0745317628885886, "grad_norm": 0.9034369587898254, "learning_rate": 5.222352215353636e-05, "loss": 0.6076, "step": 5565 }, { "epoch": 1.0747248503572118, "grad_norm": 0.862713634967804, "learning_rate": 5.220668827583041e-05, "loss": 0.5585, "step": 5566 }, { "epoch": 1.0749179378258351, "grad_norm": 0.8018540143966675, "learning_rate": 5.218985414750053e-05, "loss": 0.5891, "step": 5567 }, { "epoch": 1.0751110252944585, "grad_norm": 0.7757586240768433, "learning_rate": 5.217301977045864e-05, "loss": 0.5238, "step": 5568 }, { "epoch": 1.0753041127630816, "grad_norm": 0.887808084487915, "learning_rate": 5.215618514661674e-05, "loss": 0.6249, "step": 5569 }, { "epoch": 1.075497200231705, "grad_norm": 0.7986299991607666, "learning_rate": 5.213935027788678e-05, "loss": 0.499, "step": 5570 }, { "epoch": 1.0756902877003283, "grad_norm": 0.8749318718910217, "learning_rate": 5.212251516618077e-05, "loss": 0.5719, "step": 5571 }, { "epoch": 1.0758833751689516, "grad_norm": 0.9523884057998657, "learning_rate": 5.210567981341078e-05, "loss": 0.6191, "step": 5572 }, { "epoch": 1.0760764626375747, "grad_norm": 1.0735949277877808, "learning_rate": 5.2088844221488865e-05, "loss": 0.5996, "step": 5573 }, { "epoch": 1.076269550106198, "grad_norm": 0.9185238480567932, "learning_rate": 5.207200839232712e-05, "loss": 0.6016, "step": 5574 }, { "epoch": 1.0764626375748214, "grad_norm": 0.9695579409599304, "learning_rate": 5.205517232783768e-05, "loss": 0.6011, "step": 5575 }, { "epoch": 1.0766557250434448, "grad_norm": 0.8373243808746338, "learning_rate": 5.2038336029932685e-05, "loss": 0.6383, "step": 5576 }, { "epoch": 1.0768488125120679, "grad_norm": 1.0147756338119507, "learning_rate": 5.2021499500524305e-05, "loss": 0.6687, "step": 5577 }, { "epoch": 1.0770418999806912, "grad_norm": 1.9182273149490356, "learning_rate": 5.200466274152476e-05, "loss": 0.6755, "step": 5578 }, { "epoch": 1.0772349874493146, "grad_norm": 0.8926396369934082, "learning_rate": 5.19878257548463e-05, "loss": 0.4898, "step": 5579 }, { "epoch": 1.077428074917938, "grad_norm": 1.085227370262146, "learning_rate": 5.197098854240112e-05, "loss": 0.671, "step": 5580 }, { "epoch": 1.077621162386561, "grad_norm": 0.9610598087310791, "learning_rate": 5.1954151106101547e-05, "loss": 0.6746, "step": 5581 }, { "epoch": 1.0778142498551844, "grad_norm": 1.433364748954773, "learning_rate": 5.193731344785987e-05, "loss": 0.5895, "step": 5582 }, { "epoch": 1.0780073373238077, "grad_norm": 0.9933367371559143, "learning_rate": 5.192047556958845e-05, "loss": 0.6342, "step": 5583 }, { "epoch": 1.078200424792431, "grad_norm": 1.7212146520614624, "learning_rate": 5.1903637473199594e-05, "loss": 0.6322, "step": 5584 }, { "epoch": 1.0783935122610542, "grad_norm": 0.8956708908081055, "learning_rate": 5.188679916060572e-05, "loss": 0.6528, "step": 5585 }, { "epoch": 1.0785865997296775, "grad_norm": 0.9117361307144165, "learning_rate": 5.1869960633719226e-05, "loss": 0.6725, "step": 5586 }, { "epoch": 1.0787796871983009, "grad_norm": 1.0603057146072388, "learning_rate": 5.185312189445255e-05, "loss": 0.5971, "step": 5587 }, { "epoch": 1.0789727746669242, "grad_norm": 0.794121265411377, "learning_rate": 5.183628294471812e-05, "loss": 0.6282, "step": 5588 }, { "epoch": 1.0791658621355473, "grad_norm": 0.8757795691490173, "learning_rate": 5.181944378642842e-05, "loss": 0.6141, "step": 5589 }, { "epoch": 1.0793589496041707, "grad_norm": 1.0397666692733765, "learning_rate": 5.1802604421496004e-05, "loss": 0.5782, "step": 5590 }, { "epoch": 1.079552037072794, "grad_norm": 0.852088212966919, "learning_rate": 5.178576485183333e-05, "loss": 0.6742, "step": 5591 }, { "epoch": 1.0797451245414174, "grad_norm": 0.68788081407547, "learning_rate": 5.176892507935297e-05, "loss": 0.5322, "step": 5592 }, { "epoch": 1.0799382120100405, "grad_norm": 0.7435106039047241, "learning_rate": 5.175208510596751e-05, "loss": 0.6549, "step": 5593 }, { "epoch": 1.0801312994786638, "grad_norm": 0.8433083891868591, "learning_rate": 5.1735244933589536e-05, "loss": 0.6636, "step": 5594 }, { "epoch": 1.0803243869472872, "grad_norm": 0.7885022759437561, "learning_rate": 5.171840456413165e-05, "loss": 0.5892, "step": 5595 }, { "epoch": 1.0805174744159105, "grad_norm": 0.8669980764389038, "learning_rate": 5.1701563999506533e-05, "loss": 0.6254, "step": 5596 }, { "epoch": 1.0807105618845336, "grad_norm": 0.7232045531272888, "learning_rate": 5.16847232416268e-05, "loss": 0.7169, "step": 5597 }, { "epoch": 1.080903649353157, "grad_norm": 0.7391890287399292, "learning_rate": 5.166788229240514e-05, "loss": 0.6172, "step": 5598 }, { "epoch": 1.0810967368217803, "grad_norm": 1.1969655752182007, "learning_rate": 5.16510411537543e-05, "loss": 0.6317, "step": 5599 }, { "epoch": 1.0812898242904037, "grad_norm": 1.283889889717102, "learning_rate": 5.163419982758697e-05, "loss": 0.5978, "step": 5600 }, { "epoch": 1.0814829117590268, "grad_norm": 1.718472957611084, "learning_rate": 5.161735831581591e-05, "loss": 0.6126, "step": 5601 }, { "epoch": 1.0816759992276501, "grad_norm": 1.1134477853775024, "learning_rate": 5.160051662035389e-05, "loss": 0.5911, "step": 5602 }, { "epoch": 1.0818690866962735, "grad_norm": 1.0136486291885376, "learning_rate": 5.1583674743113674e-05, "loss": 0.6045, "step": 5603 }, { "epoch": 1.0820621741648968, "grad_norm": 1.161729335784912, "learning_rate": 5.156683268600814e-05, "loss": 0.6112, "step": 5604 }, { "epoch": 1.08225526163352, "grad_norm": 0.9001469612121582, "learning_rate": 5.154999045095003e-05, "loss": 0.6004, "step": 5605 }, { "epoch": 1.0824483491021433, "grad_norm": 1.0108482837677002, "learning_rate": 5.1533148039852264e-05, "loss": 0.6366, "step": 5606 }, { "epoch": 1.0826414365707666, "grad_norm": 1.45576012134552, "learning_rate": 5.15163054546277e-05, "loss": 0.636, "step": 5607 }, { "epoch": 1.0828345240393897, "grad_norm": 0.9102839231491089, "learning_rate": 5.149946269718921e-05, "loss": 0.5598, "step": 5608 }, { "epoch": 1.083027611508013, "grad_norm": 1.5525407791137695, "learning_rate": 5.14826197694497e-05, "loss": 0.6905, "step": 5609 }, { "epoch": 1.0832206989766364, "grad_norm": 0.8376258611679077, "learning_rate": 5.146577667332214e-05, "loss": 0.6324, "step": 5610 }, { "epoch": 1.0834137864452598, "grad_norm": 0.721899688243866, "learning_rate": 5.1448933410719444e-05, "loss": 0.6278, "step": 5611 }, { "epoch": 1.083606873913883, "grad_norm": 1.8678475618362427, "learning_rate": 5.1432089983554596e-05, "loss": 0.5869, "step": 5612 }, { "epoch": 1.0837999613825062, "grad_norm": 0.6904691457748413, "learning_rate": 5.1415246393740576e-05, "loss": 0.6124, "step": 5613 }, { "epoch": 1.0839930488511296, "grad_norm": 1.0531914234161377, "learning_rate": 5.1398402643190394e-05, "loss": 0.6165, "step": 5614 }, { "epoch": 1.084186136319753, "grad_norm": 1.0430513620376587, "learning_rate": 5.138155873381706e-05, "loss": 0.6326, "step": 5615 }, { "epoch": 1.084379223788376, "grad_norm": 1.036783218383789, "learning_rate": 5.136471466753364e-05, "loss": 0.6322, "step": 5616 }, { "epoch": 1.0845723112569994, "grad_norm": 0.6236552596092224, "learning_rate": 5.1347870446253164e-05, "loss": 0.5989, "step": 5617 }, { "epoch": 1.0847653987256227, "grad_norm": 0.5817725658416748, "learning_rate": 5.133102607188874e-05, "loss": 0.589, "step": 5618 }, { "epoch": 1.084958486194246, "grad_norm": 0.7134684324264526, "learning_rate": 5.1314181546353455e-05, "loss": 0.644, "step": 5619 }, { "epoch": 1.0851515736628692, "grad_norm": 0.6606240272521973, "learning_rate": 5.12973368715604e-05, "loss": 0.5808, "step": 5620 }, { "epoch": 1.0853446611314925, "grad_norm": 0.81355220079422, "learning_rate": 5.1280492049422715e-05, "loss": 0.6413, "step": 5621 }, { "epoch": 1.0855377486001159, "grad_norm": 1.5001274347305298, "learning_rate": 5.126364708185356e-05, "loss": 0.6268, "step": 5622 }, { "epoch": 1.0857308360687392, "grad_norm": 1.2553261518478394, "learning_rate": 5.1246801970766065e-05, "loss": 0.5657, "step": 5623 }, { "epoch": 1.0859239235373623, "grad_norm": 0.796126127243042, "learning_rate": 5.1229956718073444e-05, "loss": 0.6601, "step": 5624 }, { "epoch": 1.0861170110059857, "grad_norm": 0.8990170955657959, "learning_rate": 5.121311132568888e-05, "loss": 0.6327, "step": 5625 }, { "epoch": 1.086310098474609, "grad_norm": 2.820805788040161, "learning_rate": 5.1196265795525566e-05, "loss": 0.6458, "step": 5626 }, { "epoch": 1.0865031859432324, "grad_norm": 0.8722284436225891, "learning_rate": 5.117942012949675e-05, "loss": 0.5992, "step": 5627 }, { "epoch": 1.0866962734118555, "grad_norm": 0.8062961101531982, "learning_rate": 5.116257432951564e-05, "loss": 0.6401, "step": 5628 }, { "epoch": 1.0868893608804788, "grad_norm": 0.6954826712608337, "learning_rate": 5.114572839749555e-05, "loss": 0.5776, "step": 5629 }, { "epoch": 1.0870824483491022, "grad_norm": 0.6839549541473389, "learning_rate": 5.1128882335349704e-05, "loss": 0.6275, "step": 5630 }, { "epoch": 1.0872755358177255, "grad_norm": 0.6095412373542786, "learning_rate": 5.111203614499139e-05, "loss": 0.5906, "step": 5631 }, { "epoch": 1.0874686232863486, "grad_norm": 1.2774391174316406, "learning_rate": 5.109518982833393e-05, "loss": 0.6106, "step": 5632 }, { "epoch": 1.087661710754972, "grad_norm": 0.6943067312240601, "learning_rate": 5.107834338729064e-05, "loss": 0.607, "step": 5633 }, { "epoch": 1.0878547982235953, "grad_norm": 0.7462583184242249, "learning_rate": 5.106149682377481e-05, "loss": 0.6361, "step": 5634 }, { "epoch": 1.0880478856922187, "grad_norm": 0.7970682382583618, "learning_rate": 5.1044650139699835e-05, "loss": 0.672, "step": 5635 }, { "epoch": 1.0882409731608418, "grad_norm": 0.8401150703430176, "learning_rate": 5.102780333697904e-05, "loss": 0.6079, "step": 5636 }, { "epoch": 1.0884340606294651, "grad_norm": 0.8462549448013306, "learning_rate": 5.10109564175258e-05, "loss": 0.5995, "step": 5637 }, { "epoch": 1.0886271480980885, "grad_norm": 0.6691082715988159, "learning_rate": 5.0994109383253506e-05, "loss": 0.6813, "step": 5638 }, { "epoch": 1.0888202355667118, "grad_norm": 0.7567232847213745, "learning_rate": 5.097726223607558e-05, "loss": 0.648, "step": 5639 }, { "epoch": 1.089013323035335, "grad_norm": 0.9000946879386902, "learning_rate": 5.096041497790536e-05, "loss": 0.6438, "step": 5640 }, { "epoch": 1.0892064105039583, "grad_norm": 0.5813236832618713, "learning_rate": 5.0943567610656315e-05, "loss": 0.5896, "step": 5641 }, { "epoch": 1.0893994979725816, "grad_norm": 1.5072883367538452, "learning_rate": 5.092672013624189e-05, "loss": 0.6331, "step": 5642 }, { "epoch": 1.089592585441205, "grad_norm": 1.1435502767562866, "learning_rate": 5.090987255657552e-05, "loss": 0.6185, "step": 5643 }, { "epoch": 1.089785672909828, "grad_norm": 0.7958948612213135, "learning_rate": 5.0893024873570646e-05, "loss": 0.5232, "step": 5644 }, { "epoch": 1.0899787603784514, "grad_norm": 1.588551640510559, "learning_rate": 5.087617708914076e-05, "loss": 0.5943, "step": 5645 }, { "epoch": 1.0901718478470748, "grad_norm": 1.246178388595581, "learning_rate": 5.0859329205199344e-05, "loss": 0.5813, "step": 5646 }, { "epoch": 1.0903649353156981, "grad_norm": 1.0455067157745361, "learning_rate": 5.084248122365988e-05, "loss": 0.624, "step": 5647 }, { "epoch": 1.0905580227843212, "grad_norm": 1.1188997030258179, "learning_rate": 5.082563314643587e-05, "loss": 0.609, "step": 5648 }, { "epoch": 1.0907511102529446, "grad_norm": 0.7959558367729187, "learning_rate": 5.080878497544083e-05, "loss": 0.5959, "step": 5649 }, { "epoch": 1.090944197721568, "grad_norm": 1.4643975496292114, "learning_rate": 5.079193671258831e-05, "loss": 0.672, "step": 5650 }, { "epoch": 1.0911372851901913, "grad_norm": 1.6170071363449097, "learning_rate": 5.07750883597918e-05, "loss": 0.6388, "step": 5651 }, { "epoch": 1.0913303726588144, "grad_norm": 0.9704468250274658, "learning_rate": 5.075823991896489e-05, "loss": 0.6674, "step": 5652 }, { "epoch": 1.0915234601274377, "grad_norm": 0.9183454513549805, "learning_rate": 5.074139139202112e-05, "loss": 0.678, "step": 5653 }, { "epoch": 1.091716547596061, "grad_norm": 0.8644444942474365, "learning_rate": 5.072454278087405e-05, "loss": 0.6021, "step": 5654 }, { "epoch": 1.0919096350646842, "grad_norm": 1.4727520942687988, "learning_rate": 5.070769408743725e-05, "loss": 0.6321, "step": 5655 }, { "epoch": 1.0921027225333075, "grad_norm": 0.8977383971214294, "learning_rate": 5.069084531362433e-05, "loss": 0.5919, "step": 5656 }, { "epoch": 1.0922958100019309, "grad_norm": 0.7986342310905457, "learning_rate": 5.067399646134888e-05, "loss": 0.6241, "step": 5657 }, { "epoch": 1.0924888974705542, "grad_norm": 0.9239400625228882, "learning_rate": 5.0657147532524486e-05, "loss": 0.6049, "step": 5658 }, { "epoch": 1.0926819849391776, "grad_norm": 0.667156457901001, "learning_rate": 5.0640298529064766e-05, "loss": 0.6149, "step": 5659 }, { "epoch": 1.0928750724078007, "grad_norm": 0.8124087452888489, "learning_rate": 5.0623449452883344e-05, "loss": 0.6434, "step": 5660 }, { "epoch": 1.093068159876424, "grad_norm": 0.6498686671257019, "learning_rate": 5.060660030589384e-05, "loss": 0.5967, "step": 5661 }, { "epoch": 1.0932612473450474, "grad_norm": 0.672275960445404, "learning_rate": 5.0589751090009896e-05, "loss": 0.6997, "step": 5662 }, { "epoch": 1.0934543348136705, "grad_norm": 0.7496996521949768, "learning_rate": 5.057290180714516e-05, "loss": 0.622, "step": 5663 }, { "epoch": 1.0936474222822938, "grad_norm": 1.0585957765579224, "learning_rate": 5.0556052459213285e-05, "loss": 0.5995, "step": 5664 }, { "epoch": 1.0938405097509172, "grad_norm": 1.196480393409729, "learning_rate": 5.0539203048127925e-05, "loss": 0.6201, "step": 5665 }, { "epoch": 1.0940335972195405, "grad_norm": 0.9299960136413574, "learning_rate": 5.052235357580274e-05, "loss": 0.6274, "step": 5666 }, { "epoch": 1.0942266846881636, "grad_norm": 0.8454006910324097, "learning_rate": 5.0505504044151434e-05, "loss": 0.6002, "step": 5667 }, { "epoch": 1.094419772156787, "grad_norm": 0.6837853789329529, "learning_rate": 5.0488654455087645e-05, "loss": 0.5744, "step": 5668 }, { "epoch": 1.0946128596254103, "grad_norm": 0.8590264916419983, "learning_rate": 5.047180481052508e-05, "loss": 0.607, "step": 5669 }, { "epoch": 1.0948059470940337, "grad_norm": 0.6900911927223206, "learning_rate": 5.045495511237745e-05, "loss": 0.6404, "step": 5670 }, { "epoch": 1.0949990345626568, "grad_norm": 0.9711814522743225, "learning_rate": 5.043810536255842e-05, "loss": 0.6627, "step": 5671 }, { "epoch": 1.0951921220312801, "grad_norm": 0.7527381777763367, "learning_rate": 5.0421255562981703e-05, "loss": 0.65, "step": 5672 }, { "epoch": 1.0953852094999035, "grad_norm": 1.2007620334625244, "learning_rate": 5.040440571556103e-05, "loss": 0.5922, "step": 5673 }, { "epoch": 1.0955782969685268, "grad_norm": 0.8006431460380554, "learning_rate": 5.03875558222101e-05, "loss": 0.6089, "step": 5674 }, { "epoch": 1.09577138443715, "grad_norm": 0.8519790172576904, "learning_rate": 5.037070588484265e-05, "loss": 0.6343, "step": 5675 }, { "epoch": 1.0959644719057733, "grad_norm": 0.928830087184906, "learning_rate": 5.035385590537239e-05, "loss": 0.6432, "step": 5676 }, { "epoch": 1.0961575593743966, "grad_norm": 0.8984124064445496, "learning_rate": 5.033700588571305e-05, "loss": 0.5887, "step": 5677 }, { "epoch": 1.09635064684302, "grad_norm": 0.9734708666801453, "learning_rate": 5.032015582777838e-05, "loss": 0.6382, "step": 5678 }, { "epoch": 1.096543734311643, "grad_norm": 0.8694807887077332, "learning_rate": 5.030330573348212e-05, "loss": 0.6431, "step": 5679 }, { "epoch": 1.0967368217802664, "grad_norm": 0.7138782143592834, "learning_rate": 5.0286455604737984e-05, "loss": 0.5895, "step": 5680 }, { "epoch": 1.0969299092488898, "grad_norm": 0.7284506559371948, "learning_rate": 5.0269605443459754e-05, "loss": 0.5767, "step": 5681 }, { "epoch": 1.0971229967175131, "grad_norm": 0.8908578157424927, "learning_rate": 5.025275525156118e-05, "loss": 0.6058, "step": 5682 }, { "epoch": 1.0973160841861362, "grad_norm": 0.8414614796638489, "learning_rate": 5.0235905030955996e-05, "loss": 0.622, "step": 5683 }, { "epoch": 1.0975091716547596, "grad_norm": 1.378554105758667, "learning_rate": 5.0219054783557974e-05, "loss": 0.6278, "step": 5684 }, { "epoch": 1.097702259123383, "grad_norm": 0.830093502998352, "learning_rate": 5.020220451128088e-05, "loss": 0.6164, "step": 5685 }, { "epoch": 1.0978953465920063, "grad_norm": 0.5137494206428528, "learning_rate": 5.018535421603846e-05, "loss": 0.6513, "step": 5686 }, { "epoch": 1.0980884340606294, "grad_norm": 0.7625126242637634, "learning_rate": 5.016850389974451e-05, "loss": 0.6025, "step": 5687 }, { "epoch": 1.0982815215292527, "grad_norm": 0.7078133225440979, "learning_rate": 5.0151653564312765e-05, "loss": 0.6123, "step": 5688 }, { "epoch": 1.098474608997876, "grad_norm": 0.6895650029182434, "learning_rate": 5.0134803211657035e-05, "loss": 0.5643, "step": 5689 }, { "epoch": 1.0986676964664994, "grad_norm": 0.7515071034431458, "learning_rate": 5.011795284369105e-05, "loss": 0.5737, "step": 5690 }, { "epoch": 1.0988607839351225, "grad_norm": 1.132092833518982, "learning_rate": 5.010110246232862e-05, "loss": 0.629, "step": 5691 }, { "epoch": 1.0990538714037459, "grad_norm": 1.0041265487670898, "learning_rate": 5.008425206948351e-05, "loss": 0.5826, "step": 5692 }, { "epoch": 1.0992469588723692, "grad_norm": 0.9411069750785828, "learning_rate": 5.0067401667069505e-05, "loss": 0.6024, "step": 5693 }, { "epoch": 1.0994400463409926, "grad_norm": 0.8761916756629944, "learning_rate": 5.005055125700036e-05, "loss": 0.6334, "step": 5694 }, { "epoch": 1.0996331338096157, "grad_norm": 0.8068513870239258, "learning_rate": 5.0033700841189865e-05, "loss": 0.6786, "step": 5695 }, { "epoch": 1.099826221278239, "grad_norm": 0.5738810896873474, "learning_rate": 5.001685042155184e-05, "loss": 0.6619, "step": 5696 }, { "epoch": 1.1000193087468624, "grad_norm": 0.571556031703949, "learning_rate": 5e-05, "loss": 0.5571, "step": 5697 }, { "epoch": 1.1002123962154857, "grad_norm": 0.8336613774299622, "learning_rate": 4.998314957844817e-05, "loss": 0.5948, "step": 5698 }, { "epoch": 1.1004054836841088, "grad_norm": 0.6400079131126404, "learning_rate": 4.996629915881013e-05, "loss": 0.615, "step": 5699 }, { "epoch": 1.1005985711527322, "grad_norm": 1.1318097114562988, "learning_rate": 4.994944874299967e-05, "loss": 0.5888, "step": 5700 }, { "epoch": 1.1007916586213555, "grad_norm": 0.8184827566146851, "learning_rate": 4.9932598332930526e-05, "loss": 0.6182, "step": 5701 }, { "epoch": 1.1009847460899786, "grad_norm": 0.9905799031257629, "learning_rate": 4.99157479305165e-05, "loss": 0.6666, "step": 5702 }, { "epoch": 1.101177833558602, "grad_norm": 1.0096449851989746, "learning_rate": 4.989889753767139e-05, "loss": 0.5944, "step": 5703 }, { "epoch": 1.1013709210272253, "grad_norm": 0.6565203070640564, "learning_rate": 4.9882047156308956e-05, "loss": 0.6089, "step": 5704 }, { "epoch": 1.1015640084958487, "grad_norm": 0.5746813416481018, "learning_rate": 4.986519678834297e-05, "loss": 0.6046, "step": 5705 }, { "epoch": 1.101757095964472, "grad_norm": 1.157055139541626, "learning_rate": 4.984834643568724e-05, "loss": 0.6298, "step": 5706 }, { "epoch": 1.1019501834330951, "grad_norm": 0.6849783062934875, "learning_rate": 4.983149610025551e-05, "loss": 0.5788, "step": 5707 }, { "epoch": 1.1021432709017185, "grad_norm": 0.7658137679100037, "learning_rate": 4.9814645783961545e-05, "loss": 0.5692, "step": 5708 }, { "epoch": 1.1023363583703418, "grad_norm": 0.5969563126564026, "learning_rate": 4.979779548871913e-05, "loss": 0.5497, "step": 5709 }, { "epoch": 1.102529445838965, "grad_norm": 1.135384202003479, "learning_rate": 4.978094521644203e-05, "loss": 0.5496, "step": 5710 }, { "epoch": 1.1027225333075883, "grad_norm": 2.0559723377227783, "learning_rate": 4.976409496904401e-05, "loss": 0.6563, "step": 5711 }, { "epoch": 1.1029156207762116, "grad_norm": 1.0284804105758667, "learning_rate": 4.9747244748438845e-05, "loss": 0.6244, "step": 5712 }, { "epoch": 1.103108708244835, "grad_norm": 5.2164435386657715, "learning_rate": 4.973039455654025e-05, "loss": 0.5844, "step": 5713 }, { "epoch": 1.103301795713458, "grad_norm": 0.5985607504844666, "learning_rate": 4.971354439526202e-05, "loss": 0.59, "step": 5714 }, { "epoch": 1.1034948831820814, "grad_norm": 0.7072614431381226, "learning_rate": 4.96966942665179e-05, "loss": 0.6484, "step": 5715 }, { "epoch": 1.1036879706507048, "grad_norm": 0.658356785774231, "learning_rate": 4.967984417222163e-05, "loss": 0.6457, "step": 5716 }, { "epoch": 1.1038810581193281, "grad_norm": 3.1186912059783936, "learning_rate": 4.9662994114286956e-05, "loss": 0.6278, "step": 5717 }, { "epoch": 1.1040741455879513, "grad_norm": 0.8857901692390442, "learning_rate": 4.9646144094627633e-05, "loss": 0.6297, "step": 5718 }, { "epoch": 1.1042672330565746, "grad_norm": 1.3249597549438477, "learning_rate": 4.962929411515737e-05, "loss": 0.5839, "step": 5719 }, { "epoch": 1.104460320525198, "grad_norm": 0.6856987476348877, "learning_rate": 4.9612444177789905e-05, "loss": 0.6059, "step": 5720 }, { "epoch": 1.1046534079938213, "grad_norm": 0.6137818098068237, "learning_rate": 4.9595594284438977e-05, "loss": 0.6056, "step": 5721 }, { "epoch": 1.1048464954624444, "grad_norm": 0.6594364047050476, "learning_rate": 4.9578744437018295e-05, "loss": 0.6, "step": 5722 }, { "epoch": 1.1050395829310677, "grad_norm": 1.7133270502090454, "learning_rate": 4.9561894637441605e-05, "loss": 0.6491, "step": 5723 }, { "epoch": 1.105232670399691, "grad_norm": 0.688226580619812, "learning_rate": 4.9545044887622584e-05, "loss": 0.6376, "step": 5724 }, { "epoch": 1.1054257578683144, "grad_norm": 0.5229693651199341, "learning_rate": 4.952819518947493e-05, "loss": 0.5327, "step": 5725 }, { "epoch": 1.1056188453369376, "grad_norm": 0.7125344276428223, "learning_rate": 4.9511345544912366e-05, "loss": 0.6091, "step": 5726 }, { "epoch": 1.105811932805561, "grad_norm": 0.8561886548995972, "learning_rate": 4.9494495955848585e-05, "loss": 0.6483, "step": 5727 }, { "epoch": 1.1060050202741842, "grad_norm": 1.5900074243545532, "learning_rate": 4.947764642419725e-05, "loss": 0.6022, "step": 5728 }, { "epoch": 1.1061981077428076, "grad_norm": 1.0109978914260864, "learning_rate": 4.9460796951872094e-05, "loss": 0.6011, "step": 5729 }, { "epoch": 1.1063911952114307, "grad_norm": 0.7560641169548035, "learning_rate": 4.9443947540786726e-05, "loss": 0.609, "step": 5730 }, { "epoch": 1.106584282680054, "grad_norm": 0.759253978729248, "learning_rate": 4.9427098192854854e-05, "loss": 0.5663, "step": 5731 }, { "epoch": 1.1067773701486774, "grad_norm": 1.2604197263717651, "learning_rate": 4.941024890999011e-05, "loss": 0.5897, "step": 5732 }, { "epoch": 1.1069704576173007, "grad_norm": 0.9428926110267639, "learning_rate": 4.939339969410616e-05, "loss": 0.6503, "step": 5733 }, { "epoch": 1.1071635450859239, "grad_norm": 0.7420798540115356, "learning_rate": 4.937655054711667e-05, "loss": 0.63, "step": 5734 }, { "epoch": 1.1073566325545472, "grad_norm": 4.275299549102783, "learning_rate": 4.935970147093525e-05, "loss": 0.5854, "step": 5735 }, { "epoch": 1.1075497200231705, "grad_norm": 0.7819852232933044, "learning_rate": 4.934285246747553e-05, "loss": 0.6023, "step": 5736 }, { "epoch": 1.1077428074917939, "grad_norm": 0.5901384949684143, "learning_rate": 4.9326003538651124e-05, "loss": 0.6295, "step": 5737 }, { "epoch": 1.107935894960417, "grad_norm": 0.5656280517578125, "learning_rate": 4.930915468637567e-05, "loss": 0.5618, "step": 5738 }, { "epoch": 1.1081289824290403, "grad_norm": 0.6767371296882629, "learning_rate": 4.929230591256274e-05, "loss": 0.6476, "step": 5739 }, { "epoch": 1.1083220698976637, "grad_norm": 0.7470163702964783, "learning_rate": 4.927545721912597e-05, "loss": 0.6603, "step": 5740 }, { "epoch": 1.108515157366287, "grad_norm": 0.709858775138855, "learning_rate": 4.9258608607978896e-05, "loss": 0.6197, "step": 5741 }, { "epoch": 1.1087082448349102, "grad_norm": 0.9578419923782349, "learning_rate": 4.9241760081035124e-05, "loss": 0.6808, "step": 5742 }, { "epoch": 1.1089013323035335, "grad_norm": 0.5976963639259338, "learning_rate": 4.9224911640208203e-05, "loss": 0.5434, "step": 5743 }, { "epoch": 1.1090944197721568, "grad_norm": 0.8335856795310974, "learning_rate": 4.92080632874117e-05, "loss": 0.6215, "step": 5744 }, { "epoch": 1.1092875072407802, "grad_norm": 0.6987588405609131, "learning_rate": 4.919121502455917e-05, "loss": 0.5869, "step": 5745 }, { "epoch": 1.1094805947094033, "grad_norm": 0.9601340889930725, "learning_rate": 4.9174366853564156e-05, "loss": 0.613, "step": 5746 }, { "epoch": 1.1096736821780266, "grad_norm": 0.6771526336669922, "learning_rate": 4.915751877634014e-05, "loss": 0.5745, "step": 5747 }, { "epoch": 1.10986676964665, "grad_norm": 0.9393360018730164, "learning_rate": 4.914067079480067e-05, "loss": 0.6055, "step": 5748 }, { "epoch": 1.110059857115273, "grad_norm": 0.7039125561714172, "learning_rate": 4.9123822910859244e-05, "loss": 0.5963, "step": 5749 }, { "epoch": 1.1102529445838965, "grad_norm": 0.9743505120277405, "learning_rate": 4.910697512642936e-05, "loss": 0.6013, "step": 5750 }, { "epoch": 1.1104460320525198, "grad_norm": 0.5723175406455994, "learning_rate": 4.909012744342448e-05, "loss": 0.537, "step": 5751 }, { "epoch": 1.1106391195211431, "grad_norm": 1.0512562990188599, "learning_rate": 4.907327986375812e-05, "loss": 0.6533, "step": 5752 }, { "epoch": 1.1108322069897665, "grad_norm": 0.608599066734314, "learning_rate": 4.9056432389343696e-05, "loss": 0.6125, "step": 5753 }, { "epoch": 1.1110252944583896, "grad_norm": 0.5750910639762878, "learning_rate": 4.9039585022094654e-05, "loss": 0.6208, "step": 5754 }, { "epoch": 1.111218381927013, "grad_norm": 0.8772716522216797, "learning_rate": 4.902273776392444e-05, "loss": 0.6557, "step": 5755 }, { "epoch": 1.1114114693956363, "grad_norm": 1.0191973447799683, "learning_rate": 4.900589061674649e-05, "loss": 0.672, "step": 5756 }, { "epoch": 1.1116045568642594, "grad_norm": 0.6272792220115662, "learning_rate": 4.898904358247419e-05, "loss": 0.6166, "step": 5757 }, { "epoch": 1.1117976443328828, "grad_norm": 0.9048272371292114, "learning_rate": 4.8972196663020976e-05, "loss": 0.6542, "step": 5758 }, { "epoch": 1.111990731801506, "grad_norm": 1.0169318914413452, "learning_rate": 4.8955349860300176e-05, "loss": 0.5584, "step": 5759 }, { "epoch": 1.1121838192701294, "grad_norm": 1.2159329652786255, "learning_rate": 4.89385031762252e-05, "loss": 0.6746, "step": 5760 }, { "epoch": 1.1123769067387526, "grad_norm": 0.6290955543518066, "learning_rate": 4.8921656612709376e-05, "loss": 0.5904, "step": 5761 }, { "epoch": 1.112569994207376, "grad_norm": 0.877598226070404, "learning_rate": 4.8904810171666074e-05, "loss": 0.6155, "step": 5762 }, { "epoch": 1.1127630816759992, "grad_norm": 1.1293375492095947, "learning_rate": 4.8887963855008625e-05, "loss": 0.6522, "step": 5763 }, { "epoch": 1.1129561691446226, "grad_norm": 0.6034470796585083, "learning_rate": 4.8871117664650315e-05, "loss": 0.6239, "step": 5764 }, { "epoch": 1.1131492566132457, "grad_norm": 0.8266630172729492, "learning_rate": 4.885427160250447e-05, "loss": 0.619, "step": 5765 }, { "epoch": 1.113342344081869, "grad_norm": 1.2617192268371582, "learning_rate": 4.883742567048436e-05, "loss": 0.6531, "step": 5766 }, { "epoch": 1.1135354315504924, "grad_norm": 0.7844011187553406, "learning_rate": 4.882057987050326e-05, "loss": 0.5542, "step": 5767 }, { "epoch": 1.1137285190191157, "grad_norm": 0.5317338109016418, "learning_rate": 4.880373420447443e-05, "loss": 0.623, "step": 5768 }, { "epoch": 1.1139216064877389, "grad_norm": 1.0491551160812378, "learning_rate": 4.878688867431114e-05, "loss": 0.5868, "step": 5769 }, { "epoch": 1.1141146939563622, "grad_norm": 0.971523642539978, "learning_rate": 4.877004328192657e-05, "loss": 0.5761, "step": 5770 }, { "epoch": 1.1143077814249855, "grad_norm": 1.2287952899932861, "learning_rate": 4.875319802923394e-05, "loss": 0.6997, "step": 5771 }, { "epoch": 1.1145008688936089, "grad_norm": 0.7740588188171387, "learning_rate": 4.873635291814645e-05, "loss": 0.5763, "step": 5772 }, { "epoch": 1.114693956362232, "grad_norm": 1.4761898517608643, "learning_rate": 4.871950795057729e-05, "loss": 0.6587, "step": 5773 }, { "epoch": 1.1148870438308554, "grad_norm": 0.8916496634483337, "learning_rate": 4.8702663128439606e-05, "loss": 0.5682, "step": 5774 }, { "epoch": 1.1150801312994787, "grad_norm": 1.3254040479660034, "learning_rate": 4.868581845364657e-05, "loss": 0.6731, "step": 5775 }, { "epoch": 1.115273218768102, "grad_norm": 0.740520715713501, "learning_rate": 4.866897392811126e-05, "loss": 0.6346, "step": 5776 }, { "epoch": 1.1154663062367252, "grad_norm": 0.695431649684906, "learning_rate": 4.865212955374685e-05, "loss": 0.6611, "step": 5777 }, { "epoch": 1.1156593937053485, "grad_norm": 0.7658426761627197, "learning_rate": 4.863528533246637e-05, "loss": 0.5739, "step": 5778 }, { "epoch": 1.1158524811739718, "grad_norm": 1.563402771949768, "learning_rate": 4.861844126618294e-05, "loss": 0.6624, "step": 5779 }, { "epoch": 1.1160455686425952, "grad_norm": 0.4876270294189453, "learning_rate": 4.860159735680961e-05, "loss": 0.6754, "step": 5780 }, { "epoch": 1.1162386561112183, "grad_norm": 0.7101054191589355, "learning_rate": 4.858475360625945e-05, "loss": 0.6611, "step": 5781 }, { "epoch": 1.1164317435798417, "grad_norm": 0.9478870630264282, "learning_rate": 4.8567910016445416e-05, "loss": 0.6355, "step": 5782 }, { "epoch": 1.116624831048465, "grad_norm": 0.5181518793106079, "learning_rate": 4.855106658928056e-05, "loss": 0.6631, "step": 5783 }, { "epoch": 1.1168179185170883, "grad_norm": 0.47073352336883545, "learning_rate": 4.853422332667787e-05, "loss": 0.5973, "step": 5784 }, { "epoch": 1.1170110059857115, "grad_norm": 0.8893555998802185, "learning_rate": 4.851738023055029e-05, "loss": 0.6439, "step": 5785 }, { "epoch": 1.1172040934543348, "grad_norm": 0.717475414276123, "learning_rate": 4.850053730281081e-05, "loss": 0.5943, "step": 5786 }, { "epoch": 1.1173971809229581, "grad_norm": 1.8601011037826538, "learning_rate": 4.8483694545372316e-05, "loss": 0.6515, "step": 5787 }, { "epoch": 1.1175902683915815, "grad_norm": 0.6360211968421936, "learning_rate": 4.846685196014775e-05, "loss": 0.5669, "step": 5788 }, { "epoch": 1.1177833558602046, "grad_norm": 0.8129772543907166, "learning_rate": 4.845000954904997e-05, "loss": 0.6749, "step": 5789 }, { "epoch": 1.117976443328828, "grad_norm": 0.8245759010314941, "learning_rate": 4.843316731399187e-05, "loss": 0.648, "step": 5790 }, { "epoch": 1.1181695307974513, "grad_norm": 1.5564475059509277, "learning_rate": 4.841632525688632e-05, "loss": 0.6754, "step": 5791 }, { "epoch": 1.1183626182660746, "grad_norm": 0.6305826902389526, "learning_rate": 4.839948337964614e-05, "loss": 0.5976, "step": 5792 }, { "epoch": 1.1185557057346978, "grad_norm": 0.6237434148788452, "learning_rate": 4.8382641684184106e-05, "loss": 0.6364, "step": 5793 }, { "epoch": 1.118748793203321, "grad_norm": 0.7646157145500183, "learning_rate": 4.836580017241303e-05, "loss": 0.5851, "step": 5794 }, { "epoch": 1.1189418806719444, "grad_norm": 0.6847372055053711, "learning_rate": 4.834895884624571e-05, "loss": 0.6438, "step": 5795 }, { "epoch": 1.1191349681405676, "grad_norm": 1.0069894790649414, "learning_rate": 4.833211770759485e-05, "loss": 0.6475, "step": 5796 }, { "epoch": 1.119328055609191, "grad_norm": 0.745712399482727, "learning_rate": 4.8315276758373205e-05, "loss": 0.5439, "step": 5797 }, { "epoch": 1.1195211430778143, "grad_norm": 0.8769484162330627, "learning_rate": 4.8298436000493485e-05, "loss": 0.6121, "step": 5798 }, { "epoch": 1.1197142305464376, "grad_norm": 0.534292995929718, "learning_rate": 4.828159543586836e-05, "loss": 0.5893, "step": 5799 }, { "epoch": 1.119907318015061, "grad_norm": 0.6961458921432495, "learning_rate": 4.826475506641047e-05, "loss": 0.6131, "step": 5800 }, { "epoch": 1.120100405483684, "grad_norm": 1.1746548414230347, "learning_rate": 4.82479148940325e-05, "loss": 0.6209, "step": 5801 }, { "epoch": 1.1202934929523074, "grad_norm": 0.5255800485610962, "learning_rate": 4.823107492064704e-05, "loss": 0.6104, "step": 5802 }, { "epoch": 1.1204865804209307, "grad_norm": 0.6533401012420654, "learning_rate": 4.8214235148166695e-05, "loss": 0.63, "step": 5803 }, { "epoch": 1.1206796678895539, "grad_norm": 0.5788952112197876, "learning_rate": 4.8197395578504014e-05, "loss": 0.6564, "step": 5804 }, { "epoch": 1.1208727553581772, "grad_norm": 0.7365241646766663, "learning_rate": 4.818055621357159e-05, "loss": 0.6037, "step": 5805 }, { "epoch": 1.1210658428268006, "grad_norm": 0.6844108700752258, "learning_rate": 4.81637170552819e-05, "loss": 0.6145, "step": 5806 }, { "epoch": 1.121258930295424, "grad_norm": 0.5724066495895386, "learning_rate": 4.8146878105547466e-05, "loss": 0.5914, "step": 5807 }, { "epoch": 1.121452017764047, "grad_norm": 0.6354872584342957, "learning_rate": 4.8130039366280785e-05, "loss": 0.6307, "step": 5808 }, { "epoch": 1.1216451052326704, "grad_norm": 0.769378662109375, "learning_rate": 4.81132008393943e-05, "loss": 0.6674, "step": 5809 }, { "epoch": 1.1218381927012937, "grad_norm": 0.7700759768486023, "learning_rate": 4.809636252680042e-05, "loss": 0.5926, "step": 5810 }, { "epoch": 1.122031280169917, "grad_norm": 1.403935432434082, "learning_rate": 4.807952443041157e-05, "loss": 0.5977, "step": 5811 }, { "epoch": 1.1222243676385402, "grad_norm": 0.7227696776390076, "learning_rate": 4.806268655214013e-05, "loss": 0.5721, "step": 5812 }, { "epoch": 1.1224174551071635, "grad_norm": 0.6500404477119446, "learning_rate": 4.804584889389846e-05, "loss": 0.6088, "step": 5813 }, { "epoch": 1.1226105425757869, "grad_norm": 0.6207598447799683, "learning_rate": 4.8029011457598884e-05, "loss": 0.6684, "step": 5814 }, { "epoch": 1.1228036300444102, "grad_norm": 0.5923905968666077, "learning_rate": 4.801217424515373e-05, "loss": 0.5976, "step": 5815 }, { "epoch": 1.1229967175130333, "grad_norm": 0.5649651885032654, "learning_rate": 4.799533725847525e-05, "loss": 0.6747, "step": 5816 }, { "epoch": 1.1231898049816567, "grad_norm": 0.5431842803955078, "learning_rate": 4.7978500499475706e-05, "loss": 0.6282, "step": 5817 }, { "epoch": 1.12338289245028, "grad_norm": 3.939065456390381, "learning_rate": 4.7961663970067326e-05, "loss": 0.6226, "step": 5818 }, { "epoch": 1.1235759799189033, "grad_norm": 0.5256083011627197, "learning_rate": 4.7944827672162334e-05, "loss": 0.6334, "step": 5819 }, { "epoch": 1.1237690673875265, "grad_norm": 0.8308218121528625, "learning_rate": 4.7927991607672876e-05, "loss": 0.6588, "step": 5820 }, { "epoch": 1.1239621548561498, "grad_norm": 1.4231537580490112, "learning_rate": 4.791115577851115e-05, "loss": 0.6638, "step": 5821 }, { "epoch": 1.1241552423247732, "grad_norm": 0.8039008378982544, "learning_rate": 4.789432018658923e-05, "loss": 0.6616, "step": 5822 }, { "epoch": 1.1243483297933965, "grad_norm": 1.0191662311553955, "learning_rate": 4.787748483381924e-05, "loss": 0.6427, "step": 5823 }, { "epoch": 1.1245414172620196, "grad_norm": 1.0928986072540283, "learning_rate": 4.7860649722113234e-05, "loss": 0.5516, "step": 5824 }, { "epoch": 1.124734504730643, "grad_norm": 0.6097949743270874, "learning_rate": 4.7843814853383264e-05, "loss": 0.629, "step": 5825 }, { "epoch": 1.1249275921992663, "grad_norm": 0.7122581005096436, "learning_rate": 4.782698022954136e-05, "loss": 0.6267, "step": 5826 }, { "epoch": 1.1251206796678896, "grad_norm": 0.6081376075744629, "learning_rate": 4.7810145852499496e-05, "loss": 0.5982, "step": 5827 }, { "epoch": 1.1253137671365128, "grad_norm": 0.8105718493461609, "learning_rate": 4.7793311724169604e-05, "loss": 0.6524, "step": 5828 }, { "epoch": 1.125506854605136, "grad_norm": 0.7489895224571228, "learning_rate": 4.777647784646365e-05, "loss": 0.6097, "step": 5829 }, { "epoch": 1.1256999420737595, "grad_norm": 0.9845632910728455, "learning_rate": 4.7759644221293534e-05, "loss": 0.6415, "step": 5830 }, { "epoch": 1.1258930295423828, "grad_norm": 0.5915777087211609, "learning_rate": 4.77428108505711e-05, "loss": 0.6235, "step": 5831 }, { "epoch": 1.126086117011006, "grad_norm": 1.4253370761871338, "learning_rate": 4.772597773620825e-05, "loss": 0.621, "step": 5832 }, { "epoch": 1.1262792044796293, "grad_norm": 0.621711015701294, "learning_rate": 4.7709144880116733e-05, "loss": 0.6013, "step": 5833 }, { "epoch": 1.1264722919482526, "grad_norm": 1.6595516204833984, "learning_rate": 4.7692312284208384e-05, "loss": 0.6296, "step": 5834 }, { "epoch": 1.1266653794168757, "grad_norm": 0.693242609500885, "learning_rate": 4.767547995039493e-05, "loss": 0.5783, "step": 5835 }, { "epoch": 1.126858466885499, "grad_norm": 0.8058624267578125, "learning_rate": 4.765864788058811e-05, "loss": 0.6121, "step": 5836 }, { "epoch": 1.1270515543541224, "grad_norm": 0.5746802091598511, "learning_rate": 4.764181607669963e-05, "loss": 0.5662, "step": 5837 }, { "epoch": 1.1272446418227458, "grad_norm": 1.1434988975524902, "learning_rate": 4.762498454064116e-05, "loss": 0.6214, "step": 5838 }, { "epoch": 1.127437729291369, "grad_norm": 1.3682150840759277, "learning_rate": 4.7608153274324306e-05, "loss": 0.5955, "step": 5839 }, { "epoch": 1.1276308167599922, "grad_norm": 0.5980880260467529, "learning_rate": 4.7591322279660694e-05, "loss": 0.6302, "step": 5840 }, { "epoch": 1.1278239042286156, "grad_norm": 1.327847957611084, "learning_rate": 4.75744915585619e-05, "loss": 0.5675, "step": 5841 }, { "epoch": 1.128016991697239, "grad_norm": 0.5604963898658752, "learning_rate": 4.7557661112939464e-05, "loss": 0.6969, "step": 5842 }, { "epoch": 1.128210079165862, "grad_norm": 0.6697302460670471, "learning_rate": 4.75408309447049e-05, "loss": 0.6087, "step": 5843 }, { "epoch": 1.1284031666344854, "grad_norm": 0.7881593704223633, "learning_rate": 4.7524001055769697e-05, "loss": 0.5904, "step": 5844 }, { "epoch": 1.1285962541031087, "grad_norm": 0.6107775568962097, "learning_rate": 4.750717144804529e-05, "loss": 0.6055, "step": 5845 }, { "epoch": 1.128789341571732, "grad_norm": 1.3687703609466553, "learning_rate": 4.74903421234431e-05, "loss": 0.6358, "step": 5846 }, { "epoch": 1.1289824290403554, "grad_norm": 1.0453885793685913, "learning_rate": 4.747351308387452e-05, "loss": 0.5653, "step": 5847 }, { "epoch": 1.1291755165089785, "grad_norm": 0.9930850863456726, "learning_rate": 4.745668433125089e-05, "loss": 0.6546, "step": 5848 }, { "epoch": 1.1293686039776019, "grad_norm": 0.6765487194061279, "learning_rate": 4.743985586748355e-05, "loss": 0.5632, "step": 5849 }, { "epoch": 1.1295616914462252, "grad_norm": 0.9576702117919922, "learning_rate": 4.742302769448377e-05, "loss": 0.6495, "step": 5850 }, { "epoch": 1.1297547789148483, "grad_norm": 1.8033950328826904, "learning_rate": 4.740619981416281e-05, "loss": 0.5151, "step": 5851 }, { "epoch": 1.1299478663834717, "grad_norm": 1.3327668905258179, "learning_rate": 4.738937222843188e-05, "loss": 0.6002, "step": 5852 }, { "epoch": 1.130140953852095, "grad_norm": 0.7554148435592651, "learning_rate": 4.737254493920218e-05, "loss": 0.5851, "step": 5853 }, { "epoch": 1.1303340413207184, "grad_norm": 0.5882022976875305, "learning_rate": 4.7355717948384876e-05, "loss": 0.5508, "step": 5854 }, { "epoch": 1.1305271287893417, "grad_norm": 0.8296491503715515, "learning_rate": 4.7338891257891084e-05, "loss": 0.5615, "step": 5855 }, { "epoch": 1.1307202162579648, "grad_norm": 0.5380356311798096, "learning_rate": 4.732206486963186e-05, "loss": 0.6021, "step": 5856 }, { "epoch": 1.1309133037265882, "grad_norm": 0.7401276230812073, "learning_rate": 4.730523878551828e-05, "loss": 0.6589, "step": 5857 }, { "epoch": 1.1311063911952115, "grad_norm": 1.0875993967056274, "learning_rate": 4.7288413007461366e-05, "loss": 0.6821, "step": 5858 }, { "epoch": 1.1312994786638346, "grad_norm": 0.6449376344680786, "learning_rate": 4.727158753737208e-05, "loss": 0.6446, "step": 5859 }, { "epoch": 1.131492566132458, "grad_norm": 0.7974050045013428, "learning_rate": 4.7254762377161386e-05, "loss": 0.613, "step": 5860 }, { "epoch": 1.1316856536010813, "grad_norm": 2.172320604324341, "learning_rate": 4.723793752874022e-05, "loss": 0.6221, "step": 5861 }, { "epoch": 1.1318787410697047, "grad_norm": 1.0843886137008667, "learning_rate": 4.7221112994019415e-05, "loss": 0.643, "step": 5862 }, { "epoch": 1.1320718285383278, "grad_norm": 0.9553198218345642, "learning_rate": 4.720428877490981e-05, "loss": 0.5857, "step": 5863 }, { "epoch": 1.1322649160069511, "grad_norm": 0.6507105827331543, "learning_rate": 4.718746487332224e-05, "loss": 0.6089, "step": 5864 }, { "epoch": 1.1324580034755745, "grad_norm": 0.7599602341651917, "learning_rate": 4.7170641291167465e-05, "loss": 0.5478, "step": 5865 }, { "epoch": 1.1326510909441978, "grad_norm": 0.5928899645805359, "learning_rate": 4.7153818030356236e-05, "loss": 0.5776, "step": 5866 }, { "epoch": 1.132844178412821, "grad_norm": 0.8468688726425171, "learning_rate": 4.71369950927992e-05, "loss": 0.6369, "step": 5867 }, { "epoch": 1.1330372658814443, "grad_norm": 0.6613619923591614, "learning_rate": 4.7120172480407055e-05, "loss": 0.564, "step": 5868 }, { "epoch": 1.1332303533500676, "grad_norm": 0.7081765532493591, "learning_rate": 4.710335019509042e-05, "loss": 0.6331, "step": 5869 }, { "epoch": 1.133423440818691, "grad_norm": 0.628680408000946, "learning_rate": 4.708652823875987e-05, "loss": 0.5656, "step": 5870 }, { "epoch": 1.133616528287314, "grad_norm": 3.0843658447265625, "learning_rate": 4.7069706613325956e-05, "loss": 0.6343, "step": 5871 }, { "epoch": 1.1338096157559374, "grad_norm": 0.7125868797302246, "learning_rate": 4.705288532069922e-05, "loss": 0.6826, "step": 5872 }, { "epoch": 1.1340027032245608, "grad_norm": 0.5448782444000244, "learning_rate": 4.703606436279009e-05, "loss": 0.6154, "step": 5873 }, { "epoch": 1.134195790693184, "grad_norm": 0.6655316352844238, "learning_rate": 4.701924374150901e-05, "loss": 0.6729, "step": 5874 }, { "epoch": 1.1343888781618072, "grad_norm": 0.5807668566703796, "learning_rate": 4.7002423458766385e-05, "loss": 0.6048, "step": 5875 }, { "epoch": 1.1345819656304306, "grad_norm": 0.8714079260826111, "learning_rate": 4.6985603516472594e-05, "loss": 0.5924, "step": 5876 }, { "epoch": 1.134775053099054, "grad_norm": 0.7614898085594177, "learning_rate": 4.696878391653792e-05, "loss": 0.6785, "step": 5877 }, { "epoch": 1.1349681405676773, "grad_norm": 0.5581685900688171, "learning_rate": 4.695196466087268e-05, "loss": 0.5976, "step": 5878 }, { "epoch": 1.1351612280363004, "grad_norm": 0.643054187297821, "learning_rate": 4.693514575138707e-05, "loss": 0.5843, "step": 5879 }, { "epoch": 1.1353543155049237, "grad_norm": 0.6932922601699829, "learning_rate": 4.6918327189991335e-05, "loss": 0.5378, "step": 5880 }, { "epoch": 1.135547402973547, "grad_norm": 0.5499346852302551, "learning_rate": 4.690150897859561e-05, "loss": 0.5591, "step": 5881 }, { "epoch": 1.1357404904421702, "grad_norm": 0.4606683850288391, "learning_rate": 4.688469111911003e-05, "loss": 0.5952, "step": 5882 }, { "epoch": 1.1359335779107935, "grad_norm": 0.5778303146362305, "learning_rate": 4.6867873613444684e-05, "loss": 0.5723, "step": 5883 }, { "epoch": 1.1361266653794169, "grad_norm": 1.2032495737075806, "learning_rate": 4.6851056463509625e-05, "loss": 0.6219, "step": 5884 }, { "epoch": 1.1363197528480402, "grad_norm": 5.386508464813232, "learning_rate": 4.6834239671214806e-05, "loss": 0.5635, "step": 5885 }, { "epoch": 1.1365128403166636, "grad_norm": 0.8808706998825073, "learning_rate": 4.681742323847024e-05, "loss": 0.6152, "step": 5886 }, { "epoch": 1.1367059277852867, "grad_norm": 1.0419195890426636, "learning_rate": 4.6800607167185825e-05, "loss": 0.6331, "step": 5887 }, { "epoch": 1.13689901525391, "grad_norm": 0.569983184337616, "learning_rate": 4.678379145927144e-05, "loss": 0.6099, "step": 5888 }, { "epoch": 1.1370921027225334, "grad_norm": 0.9323469400405884, "learning_rate": 4.6766976116636954e-05, "loss": 0.5978, "step": 5889 }, { "epoch": 1.1372851901911565, "grad_norm": 0.5707986354827881, "learning_rate": 4.675016114119211e-05, "loss": 0.6768, "step": 5890 }, { "epoch": 1.1374782776597798, "grad_norm": 0.5674338340759277, "learning_rate": 4.673334653484671e-05, "loss": 0.7156, "step": 5891 }, { "epoch": 1.1376713651284032, "grad_norm": 0.9244838953018188, "learning_rate": 4.6716532299510443e-05, "loss": 0.6009, "step": 5892 }, { "epoch": 1.1378644525970265, "grad_norm": 0.8405105471611023, "learning_rate": 4.6699718437093e-05, "loss": 0.6247, "step": 5893 }, { "epoch": 1.1380575400656499, "grad_norm": 0.5662208795547485, "learning_rate": 4.668290494950398e-05, "loss": 0.5867, "step": 5894 }, { "epoch": 1.138250627534273, "grad_norm": 0.9779578447341919, "learning_rate": 4.666609183865301e-05, "loss": 0.6072, "step": 5895 }, { "epoch": 1.1384437150028963, "grad_norm": 0.6703704595565796, "learning_rate": 4.66492791064496e-05, "loss": 0.6112, "step": 5896 }, { "epoch": 1.1386368024715197, "grad_norm": 0.6384516954421997, "learning_rate": 4.663246675480327e-05, "loss": 0.6793, "step": 5897 }, { "epoch": 1.1388298899401428, "grad_norm": 0.702673614025116, "learning_rate": 4.6615654785623454e-05, "loss": 0.6819, "step": 5898 }, { "epoch": 1.1390229774087661, "grad_norm": 0.4870661795139313, "learning_rate": 4.659884320081959e-05, "loss": 0.6243, "step": 5899 }, { "epoch": 1.1392160648773895, "grad_norm": 0.9434278011322021, "learning_rate": 4.658203200230104e-05, "loss": 0.6125, "step": 5900 }, { "epoch": 1.1394091523460128, "grad_norm": 0.6308638453483582, "learning_rate": 4.656522119197715e-05, "loss": 0.6166, "step": 5901 }, { "epoch": 1.1396022398146362, "grad_norm": 0.7622399926185608, "learning_rate": 4.654841077175716e-05, "loss": 0.6348, "step": 5902 }, { "epoch": 1.1397953272832593, "grad_norm": 0.881397545337677, "learning_rate": 4.6531600743550336e-05, "loss": 0.6245, "step": 5903 }, { "epoch": 1.1399884147518826, "grad_norm": 0.647769570350647, "learning_rate": 4.6514791109265874e-05, "loss": 0.6511, "step": 5904 }, { "epoch": 1.140181502220506, "grad_norm": 0.7796275615692139, "learning_rate": 4.649798187081291e-05, "loss": 0.6993, "step": 5905 }, { "epoch": 1.140374589689129, "grad_norm": 0.8390392065048218, "learning_rate": 4.648117303010055e-05, "loss": 0.6453, "step": 5906 }, { "epoch": 1.1405676771577524, "grad_norm": 0.7082614302635193, "learning_rate": 4.646436458903789e-05, "loss": 0.642, "step": 5907 }, { "epoch": 1.1407607646263758, "grad_norm": 0.7064456939697266, "learning_rate": 4.644755654953389e-05, "loss": 0.6083, "step": 5908 }, { "epoch": 1.140953852094999, "grad_norm": 1.276796579360962, "learning_rate": 4.643074891349753e-05, "loss": 0.5676, "step": 5909 }, { "epoch": 1.1411469395636222, "grad_norm": 0.6446213126182556, "learning_rate": 4.641394168283774e-05, "loss": 0.5874, "step": 5910 }, { "epoch": 1.1413400270322456, "grad_norm": 0.7330381274223328, "learning_rate": 4.639713485946341e-05, "loss": 0.5906, "step": 5911 }, { "epoch": 1.141533114500869, "grad_norm": 0.5471646189689636, "learning_rate": 4.638032844528337e-05, "loss": 0.6053, "step": 5912 }, { "epoch": 1.1417262019694923, "grad_norm": 0.7808860540390015, "learning_rate": 4.636352244220636e-05, "loss": 0.5864, "step": 5913 }, { "epoch": 1.1419192894381154, "grad_norm": 0.6030479669570923, "learning_rate": 4.634671685214115e-05, "loss": 0.6098, "step": 5914 }, { "epoch": 1.1421123769067387, "grad_norm": 0.8161371350288391, "learning_rate": 4.632991167699644e-05, "loss": 0.622, "step": 5915 }, { "epoch": 1.142305464375362, "grad_norm": 0.504561185836792, "learning_rate": 4.631310691868084e-05, "loss": 0.6058, "step": 5916 }, { "epoch": 1.1424985518439854, "grad_norm": 0.8610186576843262, "learning_rate": 4.6296302579102966e-05, "loss": 0.6716, "step": 5917 }, { "epoch": 1.1426916393126085, "grad_norm": 0.7484865188598633, "learning_rate": 4.627949866017139e-05, "loss": 0.6817, "step": 5918 }, { "epoch": 1.1428847267812319, "grad_norm": 0.5155802369117737, "learning_rate": 4.626269516379457e-05, "loss": 0.6362, "step": 5919 }, { "epoch": 1.1430778142498552, "grad_norm": 0.8232124447822571, "learning_rate": 4.624589209188096e-05, "loss": 0.5957, "step": 5920 }, { "epoch": 1.1432709017184786, "grad_norm": 0.4965214133262634, "learning_rate": 4.622908944633898e-05, "loss": 0.6463, "step": 5921 }, { "epoch": 1.1434639891871017, "grad_norm": 0.5384321808815002, "learning_rate": 4.621228722907699e-05, "loss": 0.6404, "step": 5922 }, { "epoch": 1.143657076655725, "grad_norm": 1.3592463731765747, "learning_rate": 4.619548544200328e-05, "loss": 0.5841, "step": 5923 }, { "epoch": 1.1438501641243484, "grad_norm": 2.003957986831665, "learning_rate": 4.617868408702613e-05, "loss": 0.6567, "step": 5924 }, { "epoch": 1.1440432515929717, "grad_norm": 0.41100963950157166, "learning_rate": 4.616188316605372e-05, "loss": 0.6109, "step": 5925 }, { "epoch": 1.1442363390615948, "grad_norm": 0.5544785857200623, "learning_rate": 4.6145082680994226e-05, "loss": 0.6455, "step": 5926 }, { "epoch": 1.1444294265302182, "grad_norm": 0.6641645431518555, "learning_rate": 4.612828263375575e-05, "loss": 0.6161, "step": 5927 }, { "epoch": 1.1446225139988415, "grad_norm": 3.024808883666992, "learning_rate": 4.6111483026246374e-05, "loss": 0.5983, "step": 5928 }, { "epoch": 1.1448156014674646, "grad_norm": 0.6911718249320984, "learning_rate": 4.609468386037408e-05, "loss": 0.6065, "step": 5929 }, { "epoch": 1.145008688936088, "grad_norm": 0.5435710549354553, "learning_rate": 4.607788513804684e-05, "loss": 0.5965, "step": 5930 }, { "epoch": 1.1452017764047113, "grad_norm": 0.5103104114532471, "learning_rate": 4.606108686117256e-05, "loss": 0.6368, "step": 5931 }, { "epoch": 1.1453948638733347, "grad_norm": 0.5192880630493164, "learning_rate": 4.6044289031659116e-05, "loss": 0.6085, "step": 5932 }, { "epoch": 1.145587951341958, "grad_norm": 0.5603697299957275, "learning_rate": 4.602749165141428e-05, "loss": 0.6053, "step": 5933 }, { "epoch": 1.1457810388105811, "grad_norm": 0.5340630412101746, "learning_rate": 4.601069472234584e-05, "loss": 0.594, "step": 5934 }, { "epoch": 1.1459741262792045, "grad_norm": 0.7726277709007263, "learning_rate": 4.599389824636152e-05, "loss": 0.6609, "step": 5935 }, { "epoch": 1.1461672137478278, "grad_norm": 1.111629605293274, "learning_rate": 4.597710222536892e-05, "loss": 0.5799, "step": 5936 }, { "epoch": 1.146360301216451, "grad_norm": 1.168583869934082, "learning_rate": 4.596030666127568e-05, "loss": 0.6299, "step": 5937 }, { "epoch": 1.1465533886850743, "grad_norm": 0.6957864165306091, "learning_rate": 4.5943511555989325e-05, "loss": 0.6034, "step": 5938 }, { "epoch": 1.1467464761536976, "grad_norm": 7.47896671295166, "learning_rate": 4.592671691141739e-05, "loss": 0.6447, "step": 5939 }, { "epoch": 1.146939563622321, "grad_norm": 1.2578976154327393, "learning_rate": 4.5909922729467294e-05, "loss": 0.6983, "step": 5940 }, { "epoch": 1.1471326510909443, "grad_norm": 0.6869463920593262, "learning_rate": 4.589312901204646e-05, "loss": 0.5784, "step": 5941 }, { "epoch": 1.1473257385595674, "grad_norm": 0.4966239631175995, "learning_rate": 4.5876335761062175e-05, "loss": 0.5877, "step": 5942 }, { "epoch": 1.1475188260281908, "grad_norm": 0.7419885396957397, "learning_rate": 4.5859542978421776e-05, "loss": 0.5982, "step": 5943 }, { "epoch": 1.1477119134968141, "grad_norm": 0.8127679824829102, "learning_rate": 4.584275066603247e-05, "loss": 0.6293, "step": 5944 }, { "epoch": 1.1479050009654372, "grad_norm": 0.8188669681549072, "learning_rate": 4.582595882580146e-05, "loss": 0.6241, "step": 5945 }, { "epoch": 1.1480980884340606, "grad_norm": 1.055179476737976, "learning_rate": 4.5809167459635874e-05, "loss": 0.5532, "step": 5946 }, { "epoch": 1.148291175902684, "grad_norm": 0.576023280620575, "learning_rate": 4.579237656944278e-05, "loss": 0.5581, "step": 5947 }, { "epoch": 1.1484842633713073, "grad_norm": 1.1558855772018433, "learning_rate": 4.577558615712918e-05, "loss": 0.6319, "step": 5948 }, { "epoch": 1.1486773508399306, "grad_norm": 0.6732537746429443, "learning_rate": 4.575879622460205e-05, "loss": 0.672, "step": 5949 }, { "epoch": 1.1488704383085537, "grad_norm": 0.6040414571762085, "learning_rate": 4.5742006773768324e-05, "loss": 0.5676, "step": 5950 }, { "epoch": 1.149063525777177, "grad_norm": 0.5466001629829407, "learning_rate": 4.5725217806534825e-05, "loss": 0.6084, "step": 5951 }, { "epoch": 1.1492566132458004, "grad_norm": 0.6777337789535522, "learning_rate": 4.5708429324808394e-05, "loss": 0.6239, "step": 5952 }, { "epoch": 1.1494497007144235, "grad_norm": 0.506739616394043, "learning_rate": 4.5691641330495736e-05, "loss": 0.5688, "step": 5953 }, { "epoch": 1.1496427881830469, "grad_norm": 0.6324562430381775, "learning_rate": 4.567485382550356e-05, "loss": 0.612, "step": 5954 }, { "epoch": 1.1498358756516702, "grad_norm": 0.6981696486473083, "learning_rate": 4.56580668117385e-05, "loss": 0.6, "step": 5955 }, { "epoch": 1.1500289631202936, "grad_norm": 0.6279197335243225, "learning_rate": 4.5641280291107136e-05, "loss": 0.6201, "step": 5956 }, { "epoch": 1.1502220505889167, "grad_norm": 0.7623358964920044, "learning_rate": 4.5624494265516e-05, "loss": 0.616, "step": 5957 }, { "epoch": 1.15041513805754, "grad_norm": 0.5359501838684082, "learning_rate": 4.560770873687157e-05, "loss": 0.633, "step": 5958 }, { "epoch": 1.1506082255261634, "grad_norm": 0.4581087529659271, "learning_rate": 4.559092370708022e-05, "loss": 0.6097, "step": 5959 }, { "epoch": 1.1508013129947867, "grad_norm": 0.5853796005249023, "learning_rate": 4.557413917804832e-05, "loss": 0.587, "step": 5960 }, { "epoch": 1.1509944004634098, "grad_norm": 0.4829557240009308, "learning_rate": 4.555735515168218e-05, "loss": 0.621, "step": 5961 }, { "epoch": 1.1511874879320332, "grad_norm": 0.5764696598052979, "learning_rate": 4.554057162988803e-05, "loss": 0.6898, "step": 5962 }, { "epoch": 1.1513805754006565, "grad_norm": 0.7001504302024841, "learning_rate": 4.552378861457205e-05, "loss": 0.5662, "step": 5963 }, { "epoch": 1.1515736628692799, "grad_norm": 0.5813930630683899, "learning_rate": 4.55070061076404e-05, "loss": 0.656, "step": 5964 }, { "epoch": 1.151766750337903, "grad_norm": 0.4869421124458313, "learning_rate": 4.54902241109991e-05, "loss": 0.5843, "step": 5965 }, { "epoch": 1.1519598378065263, "grad_norm": 0.9783656001091003, "learning_rate": 4.547344262655417e-05, "loss": 0.556, "step": 5966 }, { "epoch": 1.1521529252751497, "grad_norm": 1.5268480777740479, "learning_rate": 4.5456661656211584e-05, "loss": 0.6047, "step": 5967 }, { "epoch": 1.152346012743773, "grad_norm": 0.688255786895752, "learning_rate": 4.5439881201877225e-05, "loss": 0.545, "step": 5968 }, { "epoch": 1.1525391002123961, "grad_norm": 0.5578019618988037, "learning_rate": 4.542310126545693e-05, "loss": 0.6011, "step": 5969 }, { "epoch": 1.1527321876810195, "grad_norm": 0.5577206015586853, "learning_rate": 4.540632184885646e-05, "loss": 0.6538, "step": 5970 }, { "epoch": 1.1529252751496428, "grad_norm": 0.8603333234786987, "learning_rate": 4.538954295398154e-05, "loss": 0.5722, "step": 5971 }, { "epoch": 1.1531183626182662, "grad_norm": 2.2803966999053955, "learning_rate": 4.537276458273784e-05, "loss": 0.5761, "step": 5972 }, { "epoch": 1.1533114500868893, "grad_norm": 0.6403817534446716, "learning_rate": 4.535598673703094e-05, "loss": 0.6111, "step": 5973 }, { "epoch": 1.1535045375555126, "grad_norm": 0.6893260478973389, "learning_rate": 4.533920941876639e-05, "loss": 0.5856, "step": 5974 }, { "epoch": 1.153697625024136, "grad_norm": 0.7004157900810242, "learning_rate": 4.5322432629849674e-05, "loss": 0.5839, "step": 5975 }, { "epoch": 1.153890712492759, "grad_norm": 0.6555822491645813, "learning_rate": 4.53056563721862e-05, "loss": 0.5757, "step": 5976 }, { "epoch": 1.1540837999613824, "grad_norm": 0.7414466142654419, "learning_rate": 4.5288880647681315e-05, "loss": 0.6349, "step": 5977 }, { "epoch": 1.1542768874300058, "grad_norm": 0.9102056622505188, "learning_rate": 4.527210545824034e-05, "loss": 0.5871, "step": 5978 }, { "epoch": 1.1544699748986291, "grad_norm": 0.5085495710372925, "learning_rate": 4.5255330805768506e-05, "loss": 0.6444, "step": 5979 }, { "epoch": 1.1546630623672525, "grad_norm": 1.9213508367538452, "learning_rate": 4.523855669217097e-05, "loss": 0.5774, "step": 5980 }, { "epoch": 1.1548561498358756, "grad_norm": 0.7380638718605042, "learning_rate": 4.5221783119352887e-05, "loss": 0.6008, "step": 5981 }, { "epoch": 1.155049237304499, "grad_norm": 0.6574327945709229, "learning_rate": 4.5205010089219274e-05, "loss": 0.6102, "step": 5982 }, { "epoch": 1.1552423247731223, "grad_norm": 0.9282007217407227, "learning_rate": 4.518823760367512e-05, "loss": 0.5964, "step": 5983 }, { "epoch": 1.1554354122417454, "grad_norm": 0.6793168783187866, "learning_rate": 4.5171465664625375e-05, "loss": 0.6116, "step": 5984 }, { "epoch": 1.1556284997103687, "grad_norm": 0.7367089986801147, "learning_rate": 4.515469427397491e-05, "loss": 0.5615, "step": 5985 }, { "epoch": 1.155821587178992, "grad_norm": 0.6620907187461853, "learning_rate": 4.5137923433628505e-05, "loss": 0.6516, "step": 5986 }, { "epoch": 1.1560146746476154, "grad_norm": 0.7347174286842346, "learning_rate": 4.512115314549094e-05, "loss": 0.5253, "step": 5987 }, { "epoch": 1.1562077621162388, "grad_norm": 0.8450995087623596, "learning_rate": 4.5104383411466855e-05, "loss": 0.6039, "step": 5988 }, { "epoch": 1.156400849584862, "grad_norm": 0.8839191198348999, "learning_rate": 4.50876142334609e-05, "loss": 0.6488, "step": 5989 }, { "epoch": 1.1565939370534852, "grad_norm": 0.7169908285140991, "learning_rate": 4.50708456133776e-05, "loss": 0.6016, "step": 5990 }, { "epoch": 1.1567870245221086, "grad_norm": 1.2411463260650635, "learning_rate": 4.505407755312146e-05, "loss": 0.5916, "step": 5991 }, { "epoch": 1.1569801119907317, "grad_norm": 0.9634149670600891, "learning_rate": 4.503731005459693e-05, "loss": 0.622, "step": 5992 }, { "epoch": 1.157173199459355, "grad_norm": 0.5984808802604675, "learning_rate": 4.502054311970834e-05, "loss": 0.605, "step": 5993 }, { "epoch": 1.1573662869279784, "grad_norm": 2.3696177005767822, "learning_rate": 4.500377675035999e-05, "loss": 0.61, "step": 5994 }, { "epoch": 1.1575593743966017, "grad_norm": 0.8416692614555359, "learning_rate": 4.498701094845613e-05, "loss": 0.6298, "step": 5995 }, { "epoch": 1.157752461865225, "grad_norm": 0.9370647668838501, "learning_rate": 4.497024571590093e-05, "loss": 0.6173, "step": 5996 }, { "epoch": 1.1579455493338482, "grad_norm": 0.5732086896896362, "learning_rate": 4.495348105459849e-05, "loss": 0.5788, "step": 5997 }, { "epoch": 1.1581386368024715, "grad_norm": 2.1599090099334717, "learning_rate": 4.493671696645287e-05, "loss": 0.6041, "step": 5998 }, { "epoch": 1.1583317242710949, "grad_norm": 0.827339231967926, "learning_rate": 4.491995345336801e-05, "loss": 0.6165, "step": 5999 }, { "epoch": 1.158524811739718, "grad_norm": 0.6057966947555542, "learning_rate": 4.490319051724785e-05, "loss": 0.5904, "step": 6000 }, { "epoch": 1.158524811739718, "eval_loss": 0.6683863401412964, "eval_runtime": 63.5608, "eval_samples_per_second": 10.447, "eval_steps_per_second": 0.33, "step": 6000 }, { "epoch": 1.1587178992083413, "grad_norm": 0.7834309339523315, "learning_rate": 4.488642815999622e-05, "loss": 0.671, "step": 6001 }, { "epoch": 1.1589109866769647, "grad_norm": 0.7787137031555176, "learning_rate": 4.48696663835169e-05, "loss": 0.5978, "step": 6002 }, { "epoch": 1.159104074145588, "grad_norm": 0.6320275068283081, "learning_rate": 4.485290518971362e-05, "loss": 0.5974, "step": 6003 }, { "epoch": 1.1592971616142111, "grad_norm": 0.6054998636245728, "learning_rate": 4.4836144580490025e-05, "loss": 0.6272, "step": 6004 }, { "epoch": 1.1594902490828345, "grad_norm": 0.6034048795700073, "learning_rate": 4.4819384557749664e-05, "loss": 0.6005, "step": 6005 }, { "epoch": 1.1596833365514578, "grad_norm": 0.669922411441803, "learning_rate": 4.4802625123396084e-05, "loss": 0.5677, "step": 6006 }, { "epoch": 1.1598764240200812, "grad_norm": 0.9613328576087952, "learning_rate": 4.4785866279332724e-05, "loss": 0.6239, "step": 6007 }, { "epoch": 1.1600695114887043, "grad_norm": 0.7506197094917297, "learning_rate": 4.4769108027462956e-05, "loss": 0.5527, "step": 6008 }, { "epoch": 1.1602625989573276, "grad_norm": 0.6869733929634094, "learning_rate": 4.475235036969011e-05, "loss": 0.5936, "step": 6009 }, { "epoch": 1.160455686425951, "grad_norm": 0.7849960327148438, "learning_rate": 4.473559330791742e-05, "loss": 0.6101, "step": 6010 }, { "epoch": 1.1606487738945743, "grad_norm": 0.614124596118927, "learning_rate": 4.471883684404807e-05, "loss": 0.6664, "step": 6011 }, { "epoch": 1.1608418613631974, "grad_norm": 1.1143778562545776, "learning_rate": 4.4702080979985155e-05, "loss": 0.5763, "step": 6012 }, { "epoch": 1.1610349488318208, "grad_norm": 0.6022180318832397, "learning_rate": 4.4685325717631736e-05, "loss": 0.559, "step": 6013 }, { "epoch": 1.1612280363004441, "grad_norm": 0.6372161507606506, "learning_rate": 4.4668571058890774e-05, "loss": 0.6271, "step": 6014 }, { "epoch": 1.1614211237690675, "grad_norm": 0.7567427158355713, "learning_rate": 4.465181700566519e-05, "loss": 0.6505, "step": 6015 }, { "epoch": 1.1616142112376906, "grad_norm": 0.8725204467773438, "learning_rate": 4.46350635598578e-05, "loss": 0.6092, "step": 6016 }, { "epoch": 1.161807298706314, "grad_norm": 0.7416903376579285, "learning_rate": 4.461831072337138e-05, "loss": 0.5786, "step": 6017 }, { "epoch": 1.1620003861749373, "grad_norm": 3.0084991455078125, "learning_rate": 4.460155849810862e-05, "loss": 0.5322, "step": 6018 }, { "epoch": 1.1621934736435606, "grad_norm": 0.8133116364479065, "learning_rate": 4.458480688597217e-05, "loss": 0.6159, "step": 6019 }, { "epoch": 1.1623865611121837, "grad_norm": 0.8569480776786804, "learning_rate": 4.456805588886457e-05, "loss": 0.568, "step": 6020 }, { "epoch": 1.162579648580807, "grad_norm": 0.836497962474823, "learning_rate": 4.455130550868834e-05, "loss": 0.6619, "step": 6021 }, { "epoch": 1.1627727360494304, "grad_norm": 1.123559594154358, "learning_rate": 4.4534555747345845e-05, "loss": 0.6526, "step": 6022 }, { "epoch": 1.1629658235180536, "grad_norm": 0.7607426047325134, "learning_rate": 4.451780660673947e-05, "loss": 0.6741, "step": 6023 }, { "epoch": 1.163158910986677, "grad_norm": 0.7623118758201599, "learning_rate": 4.45010580887715e-05, "loss": 0.6004, "step": 6024 }, { "epoch": 1.1633519984553002, "grad_norm": 1.0318646430969238, "learning_rate": 4.448431019534411e-05, "loss": 0.6873, "step": 6025 }, { "epoch": 1.1635450859239236, "grad_norm": 0.8380336165428162, "learning_rate": 4.446756292835946e-05, "loss": 0.553, "step": 6026 }, { "epoch": 1.163738173392547, "grad_norm": 0.5455314517021179, "learning_rate": 4.445081628971963e-05, "loss": 0.5639, "step": 6027 }, { "epoch": 1.16393126086117, "grad_norm": 1.3220306634902954, "learning_rate": 4.44340702813266e-05, "loss": 0.6277, "step": 6028 }, { "epoch": 1.1641243483297934, "grad_norm": 0.6024133563041687, "learning_rate": 4.441732490508226e-05, "loss": 0.5523, "step": 6029 }, { "epoch": 1.1643174357984167, "grad_norm": 0.588817834854126, "learning_rate": 4.44005801628885e-05, "loss": 0.658, "step": 6030 }, { "epoch": 1.1645105232670399, "grad_norm": 1.6863006353378296, "learning_rate": 4.43838360566471e-05, "loss": 0.6298, "step": 6031 }, { "epoch": 1.1647036107356632, "grad_norm": 0.6117079854011536, "learning_rate": 4.436709258825976e-05, "loss": 0.5919, "step": 6032 }, { "epoch": 1.1648966982042865, "grad_norm": 0.7122900485992432, "learning_rate": 4.4350349759628094e-05, "loss": 0.5455, "step": 6033 }, { "epoch": 1.1650897856729099, "grad_norm": 0.8191114664077759, "learning_rate": 4.433360757265367e-05, "loss": 0.6678, "step": 6034 }, { "epoch": 1.1652828731415332, "grad_norm": 1.1766020059585571, "learning_rate": 4.431686602923801e-05, "loss": 0.6504, "step": 6035 }, { "epoch": 1.1654759606101563, "grad_norm": 0.7020783424377441, "learning_rate": 4.430012513128249e-05, "loss": 0.6355, "step": 6036 }, { "epoch": 1.1656690480787797, "grad_norm": 1.0897523164749146, "learning_rate": 4.428338488068846e-05, "loss": 0.6031, "step": 6037 }, { "epoch": 1.165862135547403, "grad_norm": 0.7179263234138489, "learning_rate": 4.426664527935722e-05, "loss": 0.6091, "step": 6038 }, { "epoch": 1.1660552230160262, "grad_norm": 0.8571575284004211, "learning_rate": 4.424990632918994e-05, "loss": 0.6263, "step": 6039 }, { "epoch": 1.1662483104846495, "grad_norm": 1.2860121726989746, "learning_rate": 4.4233168032087717e-05, "loss": 0.5624, "step": 6040 }, { "epoch": 1.1664413979532728, "grad_norm": 0.634240448474884, "learning_rate": 4.4216430389951635e-05, "loss": 0.6323, "step": 6041 }, { "epoch": 1.1666344854218962, "grad_norm": 0.5990033745765686, "learning_rate": 4.419969340468266e-05, "loss": 0.6079, "step": 6042 }, { "epoch": 1.1668275728905195, "grad_norm": 0.7266750335693359, "learning_rate": 4.418295707818167e-05, "loss": 0.6346, "step": 6043 }, { "epoch": 1.1670206603591426, "grad_norm": 0.7909740209579468, "learning_rate": 4.416622141234953e-05, "loss": 0.6188, "step": 6044 }, { "epoch": 1.167213747827766, "grad_norm": 0.5364885330200195, "learning_rate": 4.414948640908694e-05, "loss": 0.5955, "step": 6045 }, { "epoch": 1.1674068352963893, "grad_norm": 0.7607220411300659, "learning_rate": 4.4132752070294594e-05, "loss": 0.6236, "step": 6046 }, { "epoch": 1.1675999227650125, "grad_norm": 0.591974139213562, "learning_rate": 4.4116018397873085e-05, "loss": 0.6102, "step": 6047 }, { "epoch": 1.1677930102336358, "grad_norm": 0.8144918084144592, "learning_rate": 4.4099285393722935e-05, "loss": 0.6134, "step": 6048 }, { "epoch": 1.1679860977022591, "grad_norm": 1.0608625411987305, "learning_rate": 4.4082553059744603e-05, "loss": 0.6192, "step": 6049 }, { "epoch": 1.1681791851708825, "grad_norm": 0.7129474878311157, "learning_rate": 4.4065821397838465e-05, "loss": 0.6388, "step": 6050 }, { "epoch": 1.1683722726395056, "grad_norm": 0.8150777816772461, "learning_rate": 4.404909040990477e-05, "loss": 0.5924, "step": 6051 }, { "epoch": 1.168565360108129, "grad_norm": 1.0828362703323364, "learning_rate": 4.4032360097843763e-05, "loss": 0.5872, "step": 6052 }, { "epoch": 1.1687584475767523, "grad_norm": 0.798271119594574, "learning_rate": 4.4015630463555594e-05, "loss": 0.6288, "step": 6053 }, { "epoch": 1.1689515350453756, "grad_norm": 0.874203085899353, "learning_rate": 4.39989015089403e-05, "loss": 0.6804, "step": 6054 }, { "epoch": 1.1691446225139988, "grad_norm": 1.1568914651870728, "learning_rate": 4.398217323589791e-05, "loss": 0.6166, "step": 6055 }, { "epoch": 1.169337709982622, "grad_norm": 1.3123775720596313, "learning_rate": 4.396544564632828e-05, "loss": 0.6208, "step": 6056 }, { "epoch": 1.1695307974512454, "grad_norm": 0.7786245942115784, "learning_rate": 4.3948718742131275e-05, "loss": 0.5834, "step": 6057 }, { "epoch": 1.1697238849198688, "grad_norm": 0.8462554216384888, "learning_rate": 4.3931992525206625e-05, "loss": 0.5671, "step": 6058 }, { "epoch": 1.169916972388492, "grad_norm": 0.8523412942886353, "learning_rate": 4.391526699745403e-05, "loss": 0.692, "step": 6059 }, { "epoch": 1.1701100598571152, "grad_norm": 0.8854199051856995, "learning_rate": 4.389854216077306e-05, "loss": 0.6433, "step": 6060 }, { "epoch": 1.1703031473257386, "grad_norm": 0.7338806390762329, "learning_rate": 4.3881818017063275e-05, "loss": 0.6409, "step": 6061 }, { "epoch": 1.170496234794362, "grad_norm": 0.977124035358429, "learning_rate": 4.386509456822406e-05, "loss": 0.6147, "step": 6062 }, { "epoch": 1.170689322262985, "grad_norm": 0.7163652181625366, "learning_rate": 4.3848371816154814e-05, "loss": 0.595, "step": 6063 }, { "epoch": 1.1708824097316084, "grad_norm": 0.6837709546089172, "learning_rate": 4.38316497627548e-05, "loss": 0.5704, "step": 6064 }, { "epoch": 1.1710754972002317, "grad_norm": 1.359569787979126, "learning_rate": 4.3814928409923224e-05, "loss": 0.6256, "step": 6065 }, { "epoch": 1.171268584668855, "grad_norm": 2.3901617527008057, "learning_rate": 4.379820775955922e-05, "loss": 0.6487, "step": 6066 }, { "epoch": 1.1714616721374782, "grad_norm": 0.661734938621521, "learning_rate": 4.378148781356183e-05, "loss": 0.5592, "step": 6067 }, { "epoch": 1.1716547596061015, "grad_norm": 2.0273091793060303, "learning_rate": 4.376476857382998e-05, "loss": 0.6161, "step": 6068 }, { "epoch": 1.171847847074725, "grad_norm": 0.5609963536262512, "learning_rate": 4.374805004226259e-05, "loss": 0.605, "step": 6069 }, { "epoch": 1.172040934543348, "grad_norm": 0.9644253849983215, "learning_rate": 4.3731332220758463e-05, "loss": 0.611, "step": 6070 }, { "epoch": 1.1722340220119714, "grad_norm": 0.8251038789749146, "learning_rate": 4.37146151112163e-05, "loss": 0.6098, "step": 6071 }, { "epoch": 1.1724271094805947, "grad_norm": 0.8389576077461243, "learning_rate": 4.369789871553474e-05, "loss": 0.6145, "step": 6072 }, { "epoch": 1.172620196949218, "grad_norm": 0.6464914679527283, "learning_rate": 4.368118303561239e-05, "loss": 0.588, "step": 6073 }, { "epoch": 1.1728132844178414, "grad_norm": 0.6077699661254883, "learning_rate": 4.366446807334767e-05, "loss": 0.6382, "step": 6074 }, { "epoch": 1.1730063718864645, "grad_norm": 0.7274155616760254, "learning_rate": 4.3647753830638994e-05, "loss": 0.6898, "step": 6075 }, { "epoch": 1.1731994593550878, "grad_norm": 1.0688508749008179, "learning_rate": 4.363104030938467e-05, "loss": 0.5928, "step": 6076 }, { "epoch": 1.1733925468237112, "grad_norm": 0.8401250243186951, "learning_rate": 4.361432751148297e-05, "loss": 0.6306, "step": 6077 }, { "epoch": 1.1735856342923343, "grad_norm": 0.7580282688140869, "learning_rate": 4.3597615438832014e-05, "loss": 0.5792, "step": 6078 }, { "epoch": 1.1737787217609577, "grad_norm": 0.7423497438430786, "learning_rate": 4.358090409332986e-05, "loss": 0.641, "step": 6079 }, { "epoch": 1.173971809229581, "grad_norm": 0.8596205115318298, "learning_rate": 4.3564193476874494e-05, "loss": 0.6022, "step": 6080 }, { "epoch": 1.1741648966982043, "grad_norm": 1.2056288719177246, "learning_rate": 4.3547483591363855e-05, "loss": 0.6061, "step": 6081 }, { "epoch": 1.1743579841668277, "grad_norm": 0.8423870801925659, "learning_rate": 4.353077443869572e-05, "loss": 0.625, "step": 6082 }, { "epoch": 1.1745510716354508, "grad_norm": 0.723503053188324, "learning_rate": 4.351406602076785e-05, "loss": 0.6482, "step": 6083 }, { "epoch": 1.1747441591040741, "grad_norm": 0.8981224894523621, "learning_rate": 4.3497358339477917e-05, "loss": 0.6254, "step": 6084 }, { "epoch": 1.1749372465726975, "grad_norm": 0.5794868469238281, "learning_rate": 4.348065139672346e-05, "loss": 0.6075, "step": 6085 }, { "epoch": 1.1751303340413206, "grad_norm": 0.5973570942878723, "learning_rate": 4.346394519440195e-05, "loss": 0.5929, "step": 6086 }, { "epoch": 1.175323421509944, "grad_norm": 0.7054926753044128, "learning_rate": 4.3447239734410816e-05, "loss": 0.6025, "step": 6087 }, { "epoch": 1.1755165089785673, "grad_norm": 0.7501831650733948, "learning_rate": 4.343053501864738e-05, "loss": 0.6069, "step": 6088 }, { "epoch": 1.1757095964471906, "grad_norm": 0.7938430309295654, "learning_rate": 4.3413831049008865e-05, "loss": 0.5802, "step": 6089 }, { "epoch": 1.175902683915814, "grad_norm": 0.8107612133026123, "learning_rate": 4.3397127827392434e-05, "loss": 0.5666, "step": 6090 }, { "epoch": 1.176095771384437, "grad_norm": 0.7361031174659729, "learning_rate": 4.338042535569511e-05, "loss": 0.6634, "step": 6091 }, { "epoch": 1.1762888588530604, "grad_norm": 0.9621207118034363, "learning_rate": 4.336372363581391e-05, "loss": 0.6161, "step": 6092 }, { "epoch": 1.1764819463216838, "grad_norm": 0.7130942940711975, "learning_rate": 4.3347022669645705e-05, "loss": 0.5231, "step": 6093 }, { "epoch": 1.176675033790307, "grad_norm": 0.7885267734527588, "learning_rate": 4.3330322459087305e-05, "loss": 0.642, "step": 6094 }, { "epoch": 1.1768681212589303, "grad_norm": 0.7203328609466553, "learning_rate": 4.3313623006035456e-05, "loss": 0.6582, "step": 6095 }, { "epoch": 1.1770612087275536, "grad_norm": 0.919635534286499, "learning_rate": 4.329692431238677e-05, "loss": 0.6722, "step": 6096 }, { "epoch": 1.177254296196177, "grad_norm": 0.9748990535736084, "learning_rate": 4.328022638003779e-05, "loss": 0.677, "step": 6097 }, { "epoch": 1.1774473836648, "grad_norm": 0.991586446762085, "learning_rate": 4.3263529210884976e-05, "loss": 0.6655, "step": 6098 }, { "epoch": 1.1776404711334234, "grad_norm": 0.6375390887260437, "learning_rate": 4.3246832806824735e-05, "loss": 0.6333, "step": 6099 }, { "epoch": 1.1778335586020467, "grad_norm": 1.697863221168518, "learning_rate": 4.323013716975332e-05, "loss": 0.6372, "step": 6100 }, { "epoch": 1.17802664607067, "grad_norm": 0.6734786629676819, "learning_rate": 4.321344230156698e-05, "loss": 0.5365, "step": 6101 }, { "epoch": 1.1782197335392932, "grad_norm": 0.9441754221916199, "learning_rate": 4.319674820416177e-05, "loss": 0.5936, "step": 6102 }, { "epoch": 1.1784128210079166, "grad_norm": 0.6559448838233948, "learning_rate": 4.318005487943376e-05, "loss": 0.5817, "step": 6103 }, { "epoch": 1.17860590847654, "grad_norm": 1.276342749595642, "learning_rate": 4.316336232927886e-05, "loss": 0.6526, "step": 6104 }, { "epoch": 1.1787989959451632, "grad_norm": 1.0641690492630005, "learning_rate": 4.314667055559294e-05, "loss": 0.6084, "step": 6105 }, { "epoch": 1.1789920834137864, "grad_norm": 0.8691122531890869, "learning_rate": 4.3129979560271764e-05, "loss": 0.629, "step": 6106 }, { "epoch": 1.1791851708824097, "grad_norm": 1.0235583782196045, "learning_rate": 4.3113289345211004e-05, "loss": 0.5799, "step": 6107 }, { "epoch": 1.179378258351033, "grad_norm": 2.5167136192321777, "learning_rate": 4.3096599912306235e-05, "loss": 0.5564, "step": 6108 }, { "epoch": 1.1795713458196564, "grad_norm": 0.8973895311355591, "learning_rate": 4.307991126345297e-05, "loss": 0.6833, "step": 6109 }, { "epoch": 1.1797644332882795, "grad_norm": 0.6833648085594177, "learning_rate": 4.3063223400546594e-05, "loss": 0.6671, "step": 6110 }, { "epoch": 1.1799575207569029, "grad_norm": 0.6702916622161865, "learning_rate": 4.304653632548244e-05, "loss": 0.589, "step": 6111 }, { "epoch": 1.1801506082255262, "grad_norm": 0.8202188014984131, "learning_rate": 4.302985004015575e-05, "loss": 0.597, "step": 6112 }, { "epoch": 1.1803436956941495, "grad_norm": 0.6444829106330872, "learning_rate": 4.301316454646167e-05, "loss": 0.6234, "step": 6113 }, { "epoch": 1.1805367831627727, "grad_norm": 0.5018777251243591, "learning_rate": 4.29964798462952e-05, "loss": 0.631, "step": 6114 }, { "epoch": 1.180729870631396, "grad_norm": 0.5980693101882935, "learning_rate": 4.297979594155134e-05, "loss": 0.5854, "step": 6115 }, { "epoch": 1.1809229581000193, "grad_norm": 0.948625922203064, "learning_rate": 4.296311283412495e-05, "loss": 0.6814, "step": 6116 }, { "epoch": 1.1811160455686427, "grad_norm": 1.1459721326828003, "learning_rate": 4.2946430525910805e-05, "loss": 0.5738, "step": 6117 }, { "epoch": 1.1813091330372658, "grad_norm": 0.9188706278800964, "learning_rate": 4.292974901880362e-05, "loss": 0.6618, "step": 6118 }, { "epoch": 1.1815022205058892, "grad_norm": 0.6653585433959961, "learning_rate": 4.291306831469795e-05, "loss": 0.6284, "step": 6119 }, { "epoch": 1.1816953079745125, "grad_norm": 1.1892904043197632, "learning_rate": 4.289638841548833e-05, "loss": 0.6221, "step": 6120 }, { "epoch": 1.1818883954431358, "grad_norm": 0.7983202934265137, "learning_rate": 4.287970932306915e-05, "loss": 0.5387, "step": 6121 }, { "epoch": 1.182081482911759, "grad_norm": 1.0310790538787842, "learning_rate": 4.286303103933474e-05, "loss": 0.6578, "step": 6122 }, { "epoch": 1.1822745703803823, "grad_norm": 0.6572598814964294, "learning_rate": 4.2846353566179355e-05, "loss": 0.602, "step": 6123 }, { "epoch": 1.1824676578490056, "grad_norm": 0.8457106947898865, "learning_rate": 4.282967690549712e-05, "loss": 0.597, "step": 6124 }, { "epoch": 1.1826607453176288, "grad_norm": 0.8006728887557983, "learning_rate": 4.2813001059182064e-05, "loss": 0.6709, "step": 6125 }, { "epoch": 1.1828538327862521, "grad_norm": 0.7385476231575012, "learning_rate": 4.2796326029128155e-05, "loss": 0.6519, "step": 6126 }, { "epoch": 1.1830469202548755, "grad_norm": 0.533185601234436, "learning_rate": 4.277965181722926e-05, "loss": 0.6247, "step": 6127 }, { "epoch": 1.1832400077234988, "grad_norm": 1.0251715183258057, "learning_rate": 4.276297842537913e-05, "loss": 0.5268, "step": 6128 }, { "epoch": 1.1834330951921221, "grad_norm": 0.7842078804969788, "learning_rate": 4.274630585547146e-05, "loss": 0.6773, "step": 6129 }, { "epoch": 1.1836261826607453, "grad_norm": 0.5121485590934753, "learning_rate": 4.272963410939983e-05, "loss": 0.5939, "step": 6130 }, { "epoch": 1.1838192701293686, "grad_norm": 0.8262165784835815, "learning_rate": 4.271296318905773e-05, "loss": 0.6085, "step": 6131 }, { "epoch": 1.184012357597992, "grad_norm": 0.8128295540809631, "learning_rate": 4.2696293096338526e-05, "loss": 0.6078, "step": 6132 }, { "epoch": 1.184205445066615, "grad_norm": 0.8046287298202515, "learning_rate": 4.2679623833135535e-05, "loss": 0.6078, "step": 6133 }, { "epoch": 1.1843985325352384, "grad_norm": 0.8740498423576355, "learning_rate": 4.266295540134198e-05, "loss": 0.5598, "step": 6134 }, { "epoch": 1.1845916200038618, "grad_norm": 0.7013381123542786, "learning_rate": 4.264628780285095e-05, "loss": 0.6156, "step": 6135 }, { "epoch": 1.184784707472485, "grad_norm": 1.3222384452819824, "learning_rate": 4.2629621039555504e-05, "loss": 0.6206, "step": 6136 }, { "epoch": 1.1849777949411084, "grad_norm": 0.7768621444702148, "learning_rate": 4.2612955113348496e-05, "loss": 0.6349, "step": 6137 }, { "epoch": 1.1851708824097316, "grad_norm": 0.9246380925178528, "learning_rate": 4.259629002612281e-05, "loss": 0.6035, "step": 6138 }, { "epoch": 1.185363969878355, "grad_norm": 1.0683032274246216, "learning_rate": 4.257962577977114e-05, "loss": 0.5441, "step": 6139 }, { "epoch": 1.1855570573469782, "grad_norm": 0.5814318060874939, "learning_rate": 4.256296237618616e-05, "loss": 0.652, "step": 6140 }, { "epoch": 1.1857501448156014, "grad_norm": 0.7339361310005188, "learning_rate": 4.254629981726038e-05, "loss": 0.6338, "step": 6141 }, { "epoch": 1.1859432322842247, "grad_norm": 0.6813066601753235, "learning_rate": 4.2529638104886264e-05, "loss": 0.5355, "step": 6142 }, { "epoch": 1.186136319752848, "grad_norm": 1.0000169277191162, "learning_rate": 4.251297724095614e-05, "loss": 0.5702, "step": 6143 }, { "epoch": 1.1863294072214714, "grad_norm": 0.6957909464836121, "learning_rate": 4.249631722736229e-05, "loss": 0.5723, "step": 6144 }, { "epoch": 1.1865224946900945, "grad_norm": 1.3282631635665894, "learning_rate": 4.247965806599684e-05, "loss": 0.5559, "step": 6145 }, { "epoch": 1.1867155821587179, "grad_norm": 0.7548748254776001, "learning_rate": 4.246299975875187e-05, "loss": 0.6166, "step": 6146 }, { "epoch": 1.1869086696273412, "grad_norm": 0.9644141793251038, "learning_rate": 4.244634230751934e-05, "loss": 0.5868, "step": 6147 }, { "epoch": 1.1871017570959645, "grad_norm": 1.4094312191009521, "learning_rate": 4.24296857141911e-05, "loss": 0.6188, "step": 6148 }, { "epoch": 1.1872948445645877, "grad_norm": 1.098458170890808, "learning_rate": 4.241302998065892e-05, "loss": 0.6269, "step": 6149 }, { "epoch": 1.187487932033211, "grad_norm": 0.8401152491569519, "learning_rate": 4.2396375108814485e-05, "loss": 0.5939, "step": 6150 }, { "epoch": 1.1876810195018344, "grad_norm": 0.622612476348877, "learning_rate": 4.2379721100549364e-05, "loss": 0.6362, "step": 6151 }, { "epoch": 1.1878741069704577, "grad_norm": 1.1220773458480835, "learning_rate": 4.236306795775501e-05, "loss": 0.6137, "step": 6152 }, { "epoch": 1.1880671944390808, "grad_norm": 0.7525527477264404, "learning_rate": 4.2346415682322836e-05, "loss": 0.6649, "step": 6153 }, { "epoch": 1.1882602819077042, "grad_norm": 1.0817945003509521, "learning_rate": 4.2329764276144075e-05, "loss": 0.61, "step": 6154 }, { "epoch": 1.1884533693763275, "grad_norm": 1.0596753358840942, "learning_rate": 4.2313113741109934e-05, "loss": 0.6224, "step": 6155 }, { "epoch": 1.1886464568449508, "grad_norm": 1.1922484636306763, "learning_rate": 4.229646407911148e-05, "loss": 0.5839, "step": 6156 }, { "epoch": 1.188839544313574, "grad_norm": 1.0587923526763916, "learning_rate": 4.227981529203969e-05, "loss": 0.6464, "step": 6157 }, { "epoch": 1.1890326317821973, "grad_norm": 0.7439820766448975, "learning_rate": 4.2263167381785487e-05, "loss": 0.6319, "step": 6158 }, { "epoch": 1.1892257192508207, "grad_norm": 1.3722989559173584, "learning_rate": 4.22465203502396e-05, "loss": 0.6294, "step": 6159 }, { "epoch": 1.189418806719444, "grad_norm": 0.8725578784942627, "learning_rate": 4.222987419929271e-05, "loss": 0.6314, "step": 6160 }, { "epoch": 1.1896118941880671, "grad_norm": 1.2201924324035645, "learning_rate": 4.221322893083543e-05, "loss": 0.637, "step": 6161 }, { "epoch": 1.1898049816566905, "grad_norm": 0.6605836153030396, "learning_rate": 4.219658454675823e-05, "loss": 0.6206, "step": 6162 }, { "epoch": 1.1899980691253138, "grad_norm": 0.7190247774124146, "learning_rate": 4.2179941048951475e-05, "loss": 0.6291, "step": 6163 }, { "epoch": 1.1901911565939371, "grad_norm": 0.7940541505813599, "learning_rate": 4.216329843930549e-05, "loss": 0.5945, "step": 6164 }, { "epoch": 1.1903842440625603, "grad_norm": 0.7373538017272949, "learning_rate": 4.214665671971039e-05, "loss": 0.636, "step": 6165 }, { "epoch": 1.1905773315311836, "grad_norm": 0.7638929486274719, "learning_rate": 4.2130015892056305e-05, "loss": 0.6217, "step": 6166 }, { "epoch": 1.190770418999807, "grad_norm": 1.7033464908599854, "learning_rate": 4.211337595823318e-05, "loss": 0.5495, "step": 6167 }, { "epoch": 1.1909635064684303, "grad_norm": 0.786403238773346, "learning_rate": 4.20967369201309e-05, "loss": 0.5911, "step": 6168 }, { "epoch": 1.1911565939370534, "grad_norm": 1.286605954170227, "learning_rate": 4.208009877963926e-05, "loss": 0.602, "step": 6169 }, { "epoch": 1.1913496814056768, "grad_norm": 0.7330323457717896, "learning_rate": 4.206346153864791e-05, "loss": 0.6442, "step": 6170 }, { "epoch": 1.1915427688743, "grad_norm": 0.6583141088485718, "learning_rate": 4.204682519904641e-05, "loss": 0.6613, "step": 6171 }, { "epoch": 1.1917358563429232, "grad_norm": 1.5548129081726074, "learning_rate": 4.2030189762724234e-05, "loss": 0.6337, "step": 6172 }, { "epoch": 1.1919289438115466, "grad_norm": 0.6531593203544617, "learning_rate": 4.201355523157077e-05, "loss": 0.6089, "step": 6173 }, { "epoch": 1.19212203128017, "grad_norm": 1.0872517824172974, "learning_rate": 4.199692160747524e-05, "loss": 0.6588, "step": 6174 }, { "epoch": 1.1923151187487933, "grad_norm": 0.8300336599349976, "learning_rate": 4.198028889232683e-05, "loss": 0.6357, "step": 6175 }, { "epoch": 1.1925082062174166, "grad_norm": 0.49858570098876953, "learning_rate": 4.1963657088014606e-05, "loss": 0.5943, "step": 6176 }, { "epoch": 1.1927012936860397, "grad_norm": 0.8800646066665649, "learning_rate": 4.1947026196427504e-05, "loss": 0.6468, "step": 6177 }, { "epoch": 1.192894381154663, "grad_norm": 0.6015917658805847, "learning_rate": 4.193039621945435e-05, "loss": 0.6052, "step": 6178 }, { "epoch": 1.1930874686232864, "grad_norm": 0.6873188018798828, "learning_rate": 4.191376715898391e-05, "loss": 0.7496, "step": 6179 }, { "epoch": 1.1932805560919095, "grad_norm": 0.6284417510032654, "learning_rate": 4.189713901690484e-05, "loss": 0.6263, "step": 6180 }, { "epoch": 1.1934736435605329, "grad_norm": 0.6101931929588318, "learning_rate": 4.188051179510568e-05, "loss": 0.6927, "step": 6181 }, { "epoch": 1.1936667310291562, "grad_norm": 0.5736600756645203, "learning_rate": 4.1863885495474806e-05, "loss": 0.6522, "step": 6182 }, { "epoch": 1.1938598184977796, "grad_norm": 0.6082772612571716, "learning_rate": 4.184726011990058e-05, "loss": 0.6197, "step": 6183 }, { "epoch": 1.194052905966403, "grad_norm": 0.6041532754898071, "learning_rate": 4.183063567027125e-05, "loss": 0.623, "step": 6184 }, { "epoch": 1.194245993435026, "grad_norm": 0.7317903637886047, "learning_rate": 4.1814012148474877e-05, "loss": 0.6473, "step": 6185 }, { "epoch": 1.1944390809036494, "grad_norm": 0.9863235950469971, "learning_rate": 4.179738955639952e-05, "loss": 0.6113, "step": 6186 }, { "epoch": 1.1946321683722727, "grad_norm": 1.020392656326294, "learning_rate": 4.1780767895933075e-05, "loss": 0.6357, "step": 6187 }, { "epoch": 1.1948252558408958, "grad_norm": 0.8589979410171509, "learning_rate": 4.176414716896333e-05, "loss": 0.6408, "step": 6188 }, { "epoch": 1.1950183433095192, "grad_norm": 1.6270490884780884, "learning_rate": 4.174752737737797e-05, "loss": 0.5992, "step": 6189 }, { "epoch": 1.1952114307781425, "grad_norm": 0.913092851638794, "learning_rate": 4.17309085230646e-05, "loss": 0.6366, "step": 6190 }, { "epoch": 1.1954045182467659, "grad_norm": 0.6275487542152405, "learning_rate": 4.17142906079107e-05, "loss": 0.609, "step": 6191 }, { "epoch": 1.195597605715389, "grad_norm": 0.7310360670089722, "learning_rate": 4.169767363380363e-05, "loss": 0.5672, "step": 6192 }, { "epoch": 1.1957906931840123, "grad_norm": 0.7046099305152893, "learning_rate": 4.1681057602630696e-05, "loss": 0.5779, "step": 6193 }, { "epoch": 1.1959837806526357, "grad_norm": 0.8699751496315002, "learning_rate": 4.166444251627902e-05, "loss": 0.5991, "step": 6194 }, { "epoch": 1.196176868121259, "grad_norm": 2.2564218044281006, "learning_rate": 4.164782837663565e-05, "loss": 0.6736, "step": 6195 }, { "epoch": 1.1963699555898821, "grad_norm": 0.6822742223739624, "learning_rate": 4.163121518558755e-05, "loss": 0.612, "step": 6196 }, { "epoch": 1.1965630430585055, "grad_norm": 0.6666250824928284, "learning_rate": 4.1614602945021566e-05, "loss": 0.6303, "step": 6197 }, { "epoch": 1.1967561305271288, "grad_norm": 0.7038009166717529, "learning_rate": 4.159799165682441e-05, "loss": 0.6678, "step": 6198 }, { "epoch": 1.1969492179957522, "grad_norm": 0.8885790705680847, "learning_rate": 4.1581381322882735e-05, "loss": 0.6123, "step": 6199 }, { "epoch": 1.1971423054643753, "grad_norm": 0.743656575679779, "learning_rate": 4.1564771945083e-05, "loss": 0.5902, "step": 6200 }, { "epoch": 1.1973353929329986, "grad_norm": 0.882036030292511, "learning_rate": 4.154816352531167e-05, "loss": 0.587, "step": 6201 }, { "epoch": 1.197528480401622, "grad_norm": 1.6437268257141113, "learning_rate": 4.1531556065454986e-05, "loss": 0.6603, "step": 6202 }, { "epoch": 1.1977215678702453, "grad_norm": 1.2374767065048218, "learning_rate": 4.151494956739917e-05, "loss": 0.6415, "step": 6203 }, { "epoch": 1.1979146553388684, "grad_norm": 1.0655325651168823, "learning_rate": 4.14983440330303e-05, "loss": 0.5795, "step": 6204 }, { "epoch": 1.1981077428074918, "grad_norm": 0.9371628761291504, "learning_rate": 4.1481739464234336e-05, "loss": 0.5885, "step": 6205 }, { "epoch": 1.1983008302761151, "grad_norm": 0.8057360649108887, "learning_rate": 4.146513586289712e-05, "loss": 0.6069, "step": 6206 }, { "epoch": 1.1984939177447385, "grad_norm": 1.2556819915771484, "learning_rate": 4.144853323090442e-05, "loss": 0.594, "step": 6207 }, { "epoch": 1.1986870052133616, "grad_norm": 1.072434663772583, "learning_rate": 4.143193157014188e-05, "loss": 0.5974, "step": 6208 }, { "epoch": 1.198880092681985, "grad_norm": 0.7380354404449463, "learning_rate": 4.141533088249501e-05, "loss": 0.5805, "step": 6209 }, { "epoch": 1.1990731801506083, "grad_norm": 0.8349250555038452, "learning_rate": 4.139873116984926e-05, "loss": 0.6618, "step": 6210 }, { "epoch": 1.1992662676192316, "grad_norm": 0.8609845638275146, "learning_rate": 4.138213243408989e-05, "loss": 0.6484, "step": 6211 }, { "epoch": 1.1994593550878547, "grad_norm": 0.625935435295105, "learning_rate": 4.1365534677102134e-05, "loss": 0.6188, "step": 6212 }, { "epoch": 1.199652442556478, "grad_norm": 0.990063488483429, "learning_rate": 4.1348937900771056e-05, "loss": 0.5931, "step": 6213 }, { "epoch": 1.1998455300251014, "grad_norm": 0.8729775547981262, "learning_rate": 4.1332342106981636e-05, "loss": 0.6183, "step": 6214 }, { "epoch": 1.2000386174937248, "grad_norm": 1.2144215106964111, "learning_rate": 4.131574729761875e-05, "loss": 0.5846, "step": 6215 }, { "epoch": 1.2002317049623479, "grad_norm": 1.0137851238250732, "learning_rate": 4.129915347456715e-05, "loss": 0.5608, "step": 6216 }, { "epoch": 1.2004247924309712, "grad_norm": 0.7452346086502075, "learning_rate": 4.128256063971143e-05, "loss": 0.6068, "step": 6217 }, { "epoch": 1.2006178798995946, "grad_norm": 2.2382943630218506, "learning_rate": 4.126596879493615e-05, "loss": 0.6433, "step": 6218 }, { "epoch": 1.2008109673682177, "grad_norm": 1.1355812549591064, "learning_rate": 4.124937794212574e-05, "loss": 0.6898, "step": 6219 }, { "epoch": 1.201004054836841, "grad_norm": 0.8921751379966736, "learning_rate": 4.1232788083164456e-05, "loss": 0.5754, "step": 6220 }, { "epoch": 1.2011971423054644, "grad_norm": 0.6932026147842407, "learning_rate": 4.121619921993655e-05, "loss": 0.5718, "step": 6221 }, { "epoch": 1.2013902297740877, "grad_norm": 1.4025444984436035, "learning_rate": 4.119961135432602e-05, "loss": 0.5659, "step": 6222 }, { "epoch": 1.201583317242711, "grad_norm": 0.665335476398468, "learning_rate": 4.118302448821688e-05, "loss": 0.6165, "step": 6223 }, { "epoch": 1.2017764047113342, "grad_norm": 0.7549969553947449, "learning_rate": 4.116643862349295e-05, "loss": 0.6327, "step": 6224 }, { "epoch": 1.2019694921799575, "grad_norm": 0.9927385449409485, "learning_rate": 4.1149853762038e-05, "loss": 0.6235, "step": 6225 }, { "epoch": 1.2021625796485809, "grad_norm": 0.7877573370933533, "learning_rate": 4.113326990573561e-05, "loss": 0.657, "step": 6226 }, { "epoch": 1.202355667117204, "grad_norm": 1.1694962978363037, "learning_rate": 4.1116687056469325e-05, "loss": 0.6449, "step": 6227 }, { "epoch": 1.2025487545858273, "grad_norm": 0.662295937538147, "learning_rate": 4.11001052161225e-05, "loss": 0.5573, "step": 6228 }, { "epoch": 1.2027418420544507, "grad_norm": 1.0344916582107544, "learning_rate": 4.108352438657843e-05, "loss": 0.57, "step": 6229 }, { "epoch": 1.202934929523074, "grad_norm": 0.7881894111633301, "learning_rate": 4.1066944569720274e-05, "loss": 0.5869, "step": 6230 }, { "epoch": 1.2031280169916974, "grad_norm": 0.9108747243881226, "learning_rate": 4.105036576743108e-05, "loss": 0.5622, "step": 6231 }, { "epoch": 1.2033211044603205, "grad_norm": 5.35439920425415, "learning_rate": 4.103378798159379e-05, "loss": 0.6746, "step": 6232 }, { "epoch": 1.2035141919289438, "grad_norm": 1.0431967973709106, "learning_rate": 4.101721121409123e-05, "loss": 0.6117, "step": 6233 }, { "epoch": 1.2037072793975672, "grad_norm": 0.6357342004776001, "learning_rate": 4.100063546680606e-05, "loss": 0.6074, "step": 6234 }, { "epoch": 1.2039003668661903, "grad_norm": 0.7643981575965881, "learning_rate": 4.0984060741620886e-05, "loss": 0.5615, "step": 6235 }, { "epoch": 1.2040934543348136, "grad_norm": 1.0103312730789185, "learning_rate": 4.09674870404182e-05, "loss": 0.6183, "step": 6236 }, { "epoch": 1.204286541803437, "grad_norm": 0.8580048680305481, "learning_rate": 4.0950914365080314e-05, "loss": 0.6252, "step": 6237 }, { "epoch": 1.2044796292720603, "grad_norm": 0.603587806224823, "learning_rate": 4.093434271748949e-05, "loss": 0.5969, "step": 6238 }, { "epoch": 1.2046727167406834, "grad_norm": 0.7866556644439697, "learning_rate": 4.091777209952786e-05, "loss": 0.5737, "step": 6239 }, { "epoch": 1.2048658042093068, "grad_norm": 0.9703025221824646, "learning_rate": 4.090120251307741e-05, "loss": 0.6552, "step": 6240 }, { "epoch": 1.2050588916779301, "grad_norm": 0.7350534796714783, "learning_rate": 4.088463396002001e-05, "loss": 0.6155, "step": 6241 }, { "epoch": 1.2052519791465535, "grad_norm": 0.704934298992157, "learning_rate": 4.086806644223744e-05, "loss": 0.6112, "step": 6242 }, { "epoch": 1.2054450666151766, "grad_norm": 1.0965310335159302, "learning_rate": 4.085149996161137e-05, "loss": 0.6586, "step": 6243 }, { "epoch": 1.2056381540838, "grad_norm": 1.2786093950271606, "learning_rate": 4.083493452002333e-05, "loss": 0.5853, "step": 6244 }, { "epoch": 1.2058312415524233, "grad_norm": 1.07161545753479, "learning_rate": 4.08183701193547e-05, "loss": 0.6541, "step": 6245 }, { "epoch": 1.2060243290210466, "grad_norm": 0.5938099026679993, "learning_rate": 4.0801806761486796e-05, "loss": 0.5995, "step": 6246 }, { "epoch": 1.2062174164896697, "grad_norm": 0.7126007080078125, "learning_rate": 4.078524444830081e-05, "loss": 0.6133, "step": 6247 }, { "epoch": 1.206410503958293, "grad_norm": 1.3781265020370483, "learning_rate": 4.076868318167779e-05, "loss": 0.5694, "step": 6248 }, { "epoch": 1.2066035914269164, "grad_norm": 1.0600876808166504, "learning_rate": 4.075212296349866e-05, "loss": 0.6841, "step": 6249 }, { "epoch": 1.2067966788955398, "grad_norm": 0.7773905992507935, "learning_rate": 4.0735563795644294e-05, "loss": 0.6679, "step": 6250 }, { "epoch": 1.2069897663641629, "grad_norm": 1.2125520706176758, "learning_rate": 4.071900567999534e-05, "loss": 0.5817, "step": 6251 }, { "epoch": 1.2071828538327862, "grad_norm": 1.340951681137085, "learning_rate": 4.07024486184324e-05, "loss": 0.647, "step": 6252 }, { "epoch": 1.2073759413014096, "grad_norm": 0.674220621585846, "learning_rate": 4.068589261283592e-05, "loss": 0.6109, "step": 6253 }, { "epoch": 1.207569028770033, "grad_norm": 0.95135498046875, "learning_rate": 4.066933766508628e-05, "loss": 0.5881, "step": 6254 }, { "epoch": 1.207762116238656, "grad_norm": 1.7058978080749512, "learning_rate": 4.065278377706368e-05, "loss": 0.5678, "step": 6255 }, { "epoch": 1.2079552037072794, "grad_norm": 0.8594673871994019, "learning_rate": 4.063623095064824e-05, "loss": 0.6424, "step": 6256 }, { "epoch": 1.2081482911759027, "grad_norm": 0.8003299832344055, "learning_rate": 4.0619679187719905e-05, "loss": 0.6293, "step": 6257 }, { "epoch": 1.208341378644526, "grad_norm": 0.9190576076507568, "learning_rate": 4.0603128490158574e-05, "loss": 0.6194, "step": 6258 }, { "epoch": 1.2085344661131492, "grad_norm": 1.5331709384918213, "learning_rate": 4.058657885984396e-05, "loss": 0.6111, "step": 6259 }, { "epoch": 1.2087275535817725, "grad_norm": 1.1405130624771118, "learning_rate": 4.0570030298655694e-05, "loss": 0.6097, "step": 6260 }, { "epoch": 1.2089206410503959, "grad_norm": 1.0705375671386719, "learning_rate": 4.055348280847328e-05, "loss": 0.6788, "step": 6261 }, { "epoch": 1.2091137285190192, "grad_norm": 0.8820477724075317, "learning_rate": 4.0536936391176105e-05, "loss": 0.5618, "step": 6262 }, { "epoch": 1.2093068159876423, "grad_norm": 0.9298886656761169, "learning_rate": 4.052039104864338e-05, "loss": 0.6451, "step": 6263 }, { "epoch": 1.2094999034562657, "grad_norm": 0.7890244126319885, "learning_rate": 4.050384678275427e-05, "loss": 0.6216, "step": 6264 }, { "epoch": 1.209692990924889, "grad_norm": 0.7014052271842957, "learning_rate": 4.048730359538778e-05, "loss": 0.6096, "step": 6265 }, { "epoch": 1.2098860783935121, "grad_norm": 1.0682861804962158, "learning_rate": 4.047076148842279e-05, "loss": 0.6275, "step": 6266 }, { "epoch": 1.2100791658621355, "grad_norm": 3.2239010334014893, "learning_rate": 4.0454220463738095e-05, "loss": 0.5983, "step": 6267 }, { "epoch": 1.2102722533307588, "grad_norm": 0.9141876101493835, "learning_rate": 4.0437680523212285e-05, "loss": 0.6322, "step": 6268 }, { "epoch": 1.2104653407993822, "grad_norm": 1.0358208417892456, "learning_rate": 4.042114166872392e-05, "loss": 0.6173, "step": 6269 }, { "epoch": 1.2106584282680055, "grad_norm": 0.746645987033844, "learning_rate": 4.0404603902151364e-05, "loss": 0.6149, "step": 6270 }, { "epoch": 1.2108515157366286, "grad_norm": 0.7890833616256714, "learning_rate": 4.038806722537292e-05, "loss": 0.5782, "step": 6271 }, { "epoch": 1.211044603205252, "grad_norm": 1.2036957740783691, "learning_rate": 4.0371531640266705e-05, "loss": 0.6457, "step": 6272 }, { "epoch": 1.2112376906738753, "grad_norm": 0.7715213298797607, "learning_rate": 4.035499714871079e-05, "loss": 0.6212, "step": 6273 }, { "epoch": 1.2114307781424984, "grad_norm": 0.6569154858589172, "learning_rate": 4.033846375258301e-05, "loss": 0.6382, "step": 6274 }, { "epoch": 1.2116238656111218, "grad_norm": 0.6067822575569153, "learning_rate": 4.03219314537612e-05, "loss": 0.613, "step": 6275 }, { "epoch": 1.2118169530797451, "grad_norm": 0.6651844382286072, "learning_rate": 4.030540025412296e-05, "loss": 0.6206, "step": 6276 }, { "epoch": 1.2120100405483685, "grad_norm": 0.7759481072425842, "learning_rate": 4.028887015554584e-05, "loss": 0.6665, "step": 6277 }, { "epoch": 1.2122031280169918, "grad_norm": 0.9668580889701843, "learning_rate": 4.027234115990726e-05, "loss": 0.6483, "step": 6278 }, { "epoch": 1.212396215485615, "grad_norm": 1.1489595174789429, "learning_rate": 4.025581326908449e-05, "loss": 0.576, "step": 6279 }, { "epoch": 1.2125893029542383, "grad_norm": 1.4725534915924072, "learning_rate": 4.0239286484954634e-05, "loss": 0.5629, "step": 6280 }, { "epoch": 1.2127823904228616, "grad_norm": 0.8480228185653687, "learning_rate": 4.0222760809394755e-05, "loss": 0.6529, "step": 6281 }, { "epoch": 1.2129754778914847, "grad_norm": 1.8489391803741455, "learning_rate": 4.0206236244281754e-05, "loss": 0.6467, "step": 6282 }, { "epoch": 1.213168565360108, "grad_norm": 1.912155270576477, "learning_rate": 4.0189712791492386e-05, "loss": 0.685, "step": 6283 }, { "epoch": 1.2133616528287314, "grad_norm": 0.7051181793212891, "learning_rate": 4.017319045290332e-05, "loss": 0.6154, "step": 6284 }, { "epoch": 1.2135547402973548, "grad_norm": 1.60039484500885, "learning_rate": 4.015666923039105e-05, "loss": 0.6332, "step": 6285 }, { "epoch": 1.213747827765978, "grad_norm": 0.7398208379745483, "learning_rate": 4.014014912583198e-05, "loss": 0.6201, "step": 6286 }, { "epoch": 1.2139409152346012, "grad_norm": 0.7958271503448486, "learning_rate": 4.012363014110237e-05, "loss": 0.6241, "step": 6287 }, { "epoch": 1.2141340027032246, "grad_norm": 1.423573613166809, "learning_rate": 4.010711227807836e-05, "loss": 0.6233, "step": 6288 }, { "epoch": 1.214327090171848, "grad_norm": 0.7395715713500977, "learning_rate": 4.009059553863598e-05, "loss": 0.621, "step": 6289 }, { "epoch": 1.214520177640471, "grad_norm": 0.5254993438720703, "learning_rate": 4.007407992465111e-05, "loss": 0.626, "step": 6290 }, { "epoch": 1.2147132651090944, "grad_norm": 0.6356671452522278, "learning_rate": 4.0057565437999475e-05, "loss": 0.6108, "step": 6291 }, { "epoch": 1.2149063525777177, "grad_norm": 0.5640951991081238, "learning_rate": 4.004105208055671e-05, "loss": 0.6047, "step": 6292 }, { "epoch": 1.215099440046341, "grad_norm": 1.0649422407150269, "learning_rate": 4.002453985419835e-05, "loss": 0.6778, "step": 6293 }, { "epoch": 1.2152925275149642, "grad_norm": 0.8408175706863403, "learning_rate": 4.000802876079972e-05, "loss": 0.5619, "step": 6294 }, { "epoch": 1.2154856149835875, "grad_norm": 0.877461314201355, "learning_rate": 3.9991518802236086e-05, "loss": 0.6519, "step": 6295 }, { "epoch": 1.2156787024522109, "grad_norm": 0.6713468432426453, "learning_rate": 3.997500998038259e-05, "loss": 0.6073, "step": 6296 }, { "epoch": 1.2158717899208342, "grad_norm": 0.707756757736206, "learning_rate": 3.9958502297114166e-05, "loss": 0.6822, "step": 6297 }, { "epoch": 1.2160648773894573, "grad_norm": 0.8599418997764587, "learning_rate": 3.994199575430568e-05, "loss": 0.6153, "step": 6298 }, { "epoch": 1.2162579648580807, "grad_norm": 0.791455864906311, "learning_rate": 3.992549035383187e-05, "loss": 0.6307, "step": 6299 }, { "epoch": 1.216451052326704, "grad_norm": 0.6621142625808716, "learning_rate": 3.9908986097567335e-05, "loss": 0.6311, "step": 6300 }, { "epoch": 1.2166441397953274, "grad_norm": 1.5381190776824951, "learning_rate": 3.989248298738652e-05, "loss": 0.5995, "step": 6301 }, { "epoch": 1.2168372272639505, "grad_norm": 0.5547611117362976, "learning_rate": 3.98759810251638e-05, "loss": 0.5853, "step": 6302 }, { "epoch": 1.2170303147325738, "grad_norm": 0.7232505083084106, "learning_rate": 3.985948021277333e-05, "loss": 0.6237, "step": 6303 }, { "epoch": 1.2172234022011972, "grad_norm": 0.7105379104614258, "learning_rate": 3.984298055208923e-05, "loss": 0.6027, "step": 6304 }, { "epoch": 1.2174164896698205, "grad_norm": 1.1805341243743896, "learning_rate": 3.98264820449854e-05, "loss": 0.5927, "step": 6305 }, { "epoch": 1.2176095771384436, "grad_norm": 0.7578998804092407, "learning_rate": 3.980998469333568e-05, "loss": 0.5999, "step": 6306 }, { "epoch": 1.217802664607067, "grad_norm": 0.6455517411231995, "learning_rate": 3.9793488499013765e-05, "loss": 0.5954, "step": 6307 }, { "epoch": 1.2179957520756903, "grad_norm": 0.8330925107002258, "learning_rate": 3.9776993463893175e-05, "loss": 0.6706, "step": 6308 }, { "epoch": 1.2181888395443137, "grad_norm": 0.8028289079666138, "learning_rate": 3.9760499589847334e-05, "loss": 0.5876, "step": 6309 }, { "epoch": 1.2183819270129368, "grad_norm": 1.879310965538025, "learning_rate": 3.974400687874953e-05, "loss": 0.6578, "step": 6310 }, { "epoch": 1.2185750144815601, "grad_norm": 0.6710445284843445, "learning_rate": 3.9727515332472944e-05, "loss": 0.5489, "step": 6311 }, { "epoch": 1.2187681019501835, "grad_norm": 2.249288320541382, "learning_rate": 3.971102495289055e-05, "loss": 0.6903, "step": 6312 }, { "epoch": 1.2189611894188066, "grad_norm": 0.6844283938407898, "learning_rate": 3.96945357418753e-05, "loss": 0.6301, "step": 6313 }, { "epoch": 1.21915427688743, "grad_norm": 1.1294035911560059, "learning_rate": 3.967804770129989e-05, "loss": 0.6078, "step": 6314 }, { "epoch": 1.2193473643560533, "grad_norm": 1.0368592739105225, "learning_rate": 3.966156083303698e-05, "loss": 0.6285, "step": 6315 }, { "epoch": 1.2195404518246766, "grad_norm": 0.7653102874755859, "learning_rate": 3.964507513895903e-05, "loss": 0.5634, "step": 6316 }, { "epoch": 1.2197335392933, "grad_norm": 0.7435517311096191, "learning_rate": 3.9628590620938435e-05, "loss": 0.621, "step": 6317 }, { "epoch": 1.219926626761923, "grad_norm": 2.17110538482666, "learning_rate": 3.96121072808474e-05, "loss": 0.6647, "step": 6318 }, { "epoch": 1.2201197142305464, "grad_norm": 0.7142127156257629, "learning_rate": 3.959562512055803e-05, "loss": 0.6509, "step": 6319 }, { "epoch": 1.2203128016991698, "grad_norm": 0.6848883628845215, "learning_rate": 3.9579144141942246e-05, "loss": 0.5262, "step": 6320 }, { "epoch": 1.220505889167793, "grad_norm": 2.0607166290283203, "learning_rate": 3.9562664346871906e-05, "loss": 0.6274, "step": 6321 }, { "epoch": 1.2206989766364162, "grad_norm": 1.6617565155029297, "learning_rate": 3.9546185737218666e-05, "loss": 0.6487, "step": 6322 }, { "epoch": 1.2208920641050396, "grad_norm": 1.3590587377548218, "learning_rate": 3.9529708314854104e-05, "loss": 0.592, "step": 6323 }, { "epoch": 1.221085151573663, "grad_norm": 0.9098988175392151, "learning_rate": 3.951323208164964e-05, "loss": 0.7042, "step": 6324 }, { "epoch": 1.2212782390422863, "grad_norm": 0.881516695022583, "learning_rate": 3.949675703947655e-05, "loss": 0.6524, "step": 6325 }, { "epoch": 1.2214713265109094, "grad_norm": 0.8289263248443604, "learning_rate": 3.948028319020597e-05, "loss": 0.5478, "step": 6326 }, { "epoch": 1.2216644139795327, "grad_norm": 0.7826880216598511, "learning_rate": 3.946381053570891e-05, "loss": 0.6612, "step": 6327 }, { "epoch": 1.221857501448156, "grad_norm": 0.6538686752319336, "learning_rate": 3.9447339077856285e-05, "loss": 0.5996, "step": 6328 }, { "epoch": 1.2220505889167792, "grad_norm": 1.1026359796524048, "learning_rate": 3.9430868818518784e-05, "loss": 0.6315, "step": 6329 }, { "epoch": 1.2222436763854025, "grad_norm": 0.6298061609268188, "learning_rate": 3.941439975956706e-05, "loss": 0.6708, "step": 6330 }, { "epoch": 1.2224367638540259, "grad_norm": 1.9502842426300049, "learning_rate": 3.939793190287154e-05, "loss": 0.5818, "step": 6331 }, { "epoch": 1.2226298513226492, "grad_norm": 0.7810764908790588, "learning_rate": 3.938146525030258e-05, "loss": 0.6219, "step": 6332 }, { "epoch": 1.2228229387912724, "grad_norm": 0.9708845019340515, "learning_rate": 3.936499980373036e-05, "loss": 0.6028, "step": 6333 }, { "epoch": 1.2230160262598957, "grad_norm": 0.8635867238044739, "learning_rate": 3.934853556502495e-05, "loss": 0.6731, "step": 6334 }, { "epoch": 1.223209113728519, "grad_norm": 0.8150858879089355, "learning_rate": 3.9332072536056265e-05, "loss": 0.6223, "step": 6335 }, { "epoch": 1.2234022011971424, "grad_norm": 0.5287158489227295, "learning_rate": 3.931561071869411e-05, "loss": 0.6097, "step": 6336 }, { "epoch": 1.2235952886657655, "grad_norm": 0.80268394947052, "learning_rate": 3.929915011480809e-05, "loss": 0.6031, "step": 6337 }, { "epoch": 1.2237883761343888, "grad_norm": 0.7776824235916138, "learning_rate": 3.928269072626772e-05, "loss": 0.595, "step": 6338 }, { "epoch": 1.2239814636030122, "grad_norm": 1.4698083400726318, "learning_rate": 3.926623255494241e-05, "loss": 0.6309, "step": 6339 }, { "epoch": 1.2241745510716355, "grad_norm": 1.2214065790176392, "learning_rate": 3.924977560270135e-05, "loss": 0.5628, "step": 6340 }, { "epoch": 1.2243676385402587, "grad_norm": 0.7465751767158508, "learning_rate": 3.923331987141365e-05, "loss": 0.6728, "step": 6341 }, { "epoch": 1.224560726008882, "grad_norm": 0.7445945143699646, "learning_rate": 3.9216865362948276e-05, "loss": 0.6135, "step": 6342 }, { "epoch": 1.2247538134775053, "grad_norm": 0.595320463180542, "learning_rate": 3.920041207917403e-05, "loss": 0.6094, "step": 6343 }, { "epoch": 1.2249469009461287, "grad_norm": 0.8322265148162842, "learning_rate": 3.918396002195957e-05, "loss": 0.595, "step": 6344 }, { "epoch": 1.2251399884147518, "grad_norm": 0.8020801544189453, "learning_rate": 3.9167509193173465e-05, "loss": 0.6226, "step": 6345 }, { "epoch": 1.2253330758833751, "grad_norm": 0.6837173104286194, "learning_rate": 3.91510595946841e-05, "loss": 0.6684, "step": 6346 }, { "epoch": 1.2255261633519985, "grad_norm": 0.8104124665260315, "learning_rate": 3.9134611228359744e-05, "loss": 0.5564, "step": 6347 }, { "epoch": 1.2257192508206218, "grad_norm": 0.7899025678634644, "learning_rate": 3.911816409606849e-05, "loss": 0.6281, "step": 6348 }, { "epoch": 1.225912338289245, "grad_norm": 3.848334312438965, "learning_rate": 3.9101718199678315e-05, "loss": 0.6684, "step": 6349 }, { "epoch": 1.2261054257578683, "grad_norm": 0.9655457735061646, "learning_rate": 3.90852735410571e-05, "loss": 0.6589, "step": 6350 }, { "epoch": 1.2262985132264916, "grad_norm": 1.1074053049087524, "learning_rate": 3.9068830122072494e-05, "loss": 0.5714, "step": 6351 }, { "epoch": 1.226491600695115, "grad_norm": 1.1721899509429932, "learning_rate": 3.905238794459208e-05, "loss": 0.6375, "step": 6352 }, { "epoch": 1.226684688163738, "grad_norm": 2.0203330516815186, "learning_rate": 3.9035947010483265e-05, "loss": 0.6555, "step": 6353 }, { "epoch": 1.2268777756323614, "grad_norm": 0.6206871271133423, "learning_rate": 3.901950732161333e-05, "loss": 0.6012, "step": 6354 }, { "epoch": 1.2270708631009848, "grad_norm": 0.6844637393951416, "learning_rate": 3.9003068879849394e-05, "loss": 0.5544, "step": 6355 }, { "epoch": 1.2272639505696081, "grad_norm": 0.8798202276229858, "learning_rate": 3.898663168705845e-05, "loss": 0.644, "step": 6356 }, { "epoch": 1.2274570380382313, "grad_norm": 1.0027878284454346, "learning_rate": 3.897019574510735e-05, "loss": 0.5827, "step": 6357 }, { "epoch": 1.2276501255068546, "grad_norm": 1.198997974395752, "learning_rate": 3.895376105586281e-05, "loss": 0.6118, "step": 6358 }, { "epoch": 1.227843212975478, "grad_norm": 0.8031777739524841, "learning_rate": 3.8937327621191396e-05, "loss": 0.5864, "step": 6359 }, { "epoch": 1.228036300444101, "grad_norm": 6.347006320953369, "learning_rate": 3.892089544295952e-05, "loss": 0.6125, "step": 6360 }, { "epoch": 1.2282293879127244, "grad_norm": 0.784882128238678, "learning_rate": 3.8904464523033455e-05, "loss": 0.5401, "step": 6361 }, { "epoch": 1.2284224753813477, "grad_norm": 0.6909148693084717, "learning_rate": 3.8888034863279335e-05, "loss": 0.5891, "step": 6362 }, { "epoch": 1.228615562849971, "grad_norm": 0.7959286570549011, "learning_rate": 3.887160646556318e-05, "loss": 0.5891, "step": 6363 }, { "epoch": 1.2288086503185944, "grad_norm": 0.9937381148338318, "learning_rate": 3.885517933175081e-05, "loss": 0.5455, "step": 6364 }, { "epoch": 1.2290017377872176, "grad_norm": 0.7978318929672241, "learning_rate": 3.883875346370798e-05, "loss": 0.643, "step": 6365 }, { "epoch": 1.229194825255841, "grad_norm": 0.6124420762062073, "learning_rate": 3.882232886330018e-05, "loss": 0.6219, "step": 6366 }, { "epoch": 1.2293879127244642, "grad_norm": 1.246001124382019, "learning_rate": 3.88059055323929e-05, "loss": 0.5207, "step": 6367 }, { "epoch": 1.2295810001930874, "grad_norm": 0.5692791938781738, "learning_rate": 3.878948347285135e-05, "loss": 0.6266, "step": 6368 }, { "epoch": 1.2297740876617107, "grad_norm": 1.1798043251037598, "learning_rate": 3.87730626865407e-05, "loss": 0.6617, "step": 6369 }, { "epoch": 1.229967175130334, "grad_norm": 0.8422982692718506, "learning_rate": 3.875664317532595e-05, "loss": 0.6266, "step": 6370 }, { "epoch": 1.2301602625989574, "grad_norm": 0.6737861037254333, "learning_rate": 3.8740224941071905e-05, "loss": 0.5935, "step": 6371 }, { "epoch": 1.2303533500675807, "grad_norm": 1.0301532745361328, "learning_rate": 3.8723807985643265e-05, "loss": 0.5681, "step": 6372 }, { "epoch": 1.2305464375362039, "grad_norm": 1.4726132154464722, "learning_rate": 3.87073923109046e-05, "loss": 0.5878, "step": 6373 }, { "epoch": 1.2307395250048272, "grad_norm": 1.1568827629089355, "learning_rate": 3.86909779187203e-05, "loss": 0.5386, "step": 6374 }, { "epoch": 1.2309326124734505, "grad_norm": 0.885890007019043, "learning_rate": 3.8674564810954635e-05, "loss": 0.5903, "step": 6375 }, { "epoch": 1.2311256999420737, "grad_norm": 0.9949887990951538, "learning_rate": 3.8658152989471726e-05, "loss": 0.6167, "step": 6376 }, { "epoch": 1.231318787410697, "grad_norm": 1.652591586112976, "learning_rate": 3.8641742456135504e-05, "loss": 0.6066, "step": 6377 }, { "epoch": 1.2315118748793203, "grad_norm": 0.861656904220581, "learning_rate": 3.862533321280983e-05, "loss": 0.5951, "step": 6378 }, { "epoch": 1.2317049623479437, "grad_norm": 1.5426568984985352, "learning_rate": 3.8608925261358356e-05, "loss": 0.6203, "step": 6379 }, { "epoch": 1.2318980498165668, "grad_norm": 1.351287603378296, "learning_rate": 3.8592518603644606e-05, "loss": 0.6619, "step": 6380 }, { "epoch": 1.2320911372851902, "grad_norm": 1.2138992547988892, "learning_rate": 3.857611324153199e-05, "loss": 0.552, "step": 6381 }, { "epoch": 1.2322842247538135, "grad_norm": 1.608630657196045, "learning_rate": 3.855970917688373e-05, "loss": 0.5699, "step": 6382 }, { "epoch": 1.2324773122224368, "grad_norm": 2.0166091918945312, "learning_rate": 3.8543306411562884e-05, "loss": 0.6373, "step": 6383 }, { "epoch": 1.23267039969106, "grad_norm": 0.7029943466186523, "learning_rate": 3.8526904947432416e-05, "loss": 0.6231, "step": 6384 }, { "epoch": 1.2328634871596833, "grad_norm": 0.6153780817985535, "learning_rate": 3.851050478635512e-05, "loss": 0.5752, "step": 6385 }, { "epoch": 1.2330565746283066, "grad_norm": 0.7882006168365479, "learning_rate": 3.849410593019363e-05, "loss": 0.583, "step": 6386 }, { "epoch": 1.23324966209693, "grad_norm": 1.0472365617752075, "learning_rate": 3.847770838081045e-05, "loss": 0.6598, "step": 6387 }, { "epoch": 1.233442749565553, "grad_norm": 0.9113544225692749, "learning_rate": 3.846131214006791e-05, "loss": 0.6498, "step": 6388 }, { "epoch": 1.2336358370341765, "grad_norm": 0.9015027284622192, "learning_rate": 3.844491720982824e-05, "loss": 0.5935, "step": 6389 }, { "epoch": 1.2338289245027998, "grad_norm": 0.7770751118659973, "learning_rate": 3.842852359195344e-05, "loss": 0.6212, "step": 6390 }, { "epoch": 1.2340220119714231, "grad_norm": 1.0122816562652588, "learning_rate": 3.841213128830544e-05, "loss": 0.5757, "step": 6391 }, { "epoch": 1.2342150994400463, "grad_norm": 0.6795004606246948, "learning_rate": 3.8395740300746006e-05, "loss": 0.5434, "step": 6392 }, { "epoch": 1.2344081869086696, "grad_norm": 1.0043768882751465, "learning_rate": 3.837935063113672e-05, "loss": 0.6711, "step": 6393 }, { "epoch": 1.234601274377293, "grad_norm": 0.8990087509155273, "learning_rate": 3.836296228133903e-05, "loss": 0.6547, "step": 6394 }, { "epoch": 1.2347943618459163, "grad_norm": 1.2866383790969849, "learning_rate": 3.8346575253214226e-05, "loss": 0.5992, "step": 6395 }, { "epoch": 1.2349874493145394, "grad_norm": 1.0192114114761353, "learning_rate": 3.8330189548623495e-05, "loss": 0.6244, "step": 6396 }, { "epoch": 1.2351805367831628, "grad_norm": 1.6360749006271362, "learning_rate": 3.831380516942781e-05, "loss": 0.6696, "step": 6397 }, { "epoch": 1.235373624251786, "grad_norm": 1.3214040994644165, "learning_rate": 3.829742211748804e-05, "loss": 0.659, "step": 6398 }, { "epoch": 1.2355667117204094, "grad_norm": 0.8141209483146667, "learning_rate": 3.828104039466488e-05, "loss": 0.6, "step": 6399 }, { "epoch": 1.2357597991890326, "grad_norm": 1.7180947065353394, "learning_rate": 3.8264660002818875e-05, "loss": 0.6215, "step": 6400 }, { "epoch": 1.235952886657656, "grad_norm": 0.6276348233222961, "learning_rate": 3.824828094381041e-05, "loss": 0.5735, "step": 6401 }, { "epoch": 1.2361459741262792, "grad_norm": 0.8490728735923767, "learning_rate": 3.8231903219499756e-05, "loss": 0.5898, "step": 6402 }, { "epoch": 1.2363390615949026, "grad_norm": 1.2973191738128662, "learning_rate": 3.821552683174698e-05, "loss": 0.5552, "step": 6403 }, { "epoch": 1.2365321490635257, "grad_norm": 0.7889989018440247, "learning_rate": 3.819915178241205e-05, "loss": 0.5622, "step": 6404 }, { "epoch": 1.236725236532149, "grad_norm": 0.8210431337356567, "learning_rate": 3.8182778073354764e-05, "loss": 0.6672, "step": 6405 }, { "epoch": 1.2369183240007724, "grad_norm": 0.9983682036399841, "learning_rate": 3.8166405706434736e-05, "loss": 0.5867, "step": 6406 }, { "epoch": 1.2371114114693955, "grad_norm": 1.0315449237823486, "learning_rate": 3.815003468351145e-05, "loss": 0.6239, "step": 6407 }, { "epoch": 1.2373044989380189, "grad_norm": 1.2195079326629639, "learning_rate": 3.8133665006444255e-05, "loss": 0.6903, "step": 6408 }, { "epoch": 1.2374975864066422, "grad_norm": 0.920066237449646, "learning_rate": 3.811729667709234e-05, "loss": 0.6262, "step": 6409 }, { "epoch": 1.2376906738752655, "grad_norm": 0.9522813558578491, "learning_rate": 3.8100929697314736e-05, "loss": 0.6167, "step": 6410 }, { "epoch": 1.2378837613438889, "grad_norm": 0.7929341197013855, "learning_rate": 3.8084564068970274e-05, "loss": 0.63, "step": 6411 }, { "epoch": 1.238076848812512, "grad_norm": 4.831954479217529, "learning_rate": 3.8068199793917706e-05, "loss": 0.5845, "step": 6412 }, { "epoch": 1.2382699362811354, "grad_norm": 0.807424008846283, "learning_rate": 3.8051836874015614e-05, "loss": 0.6076, "step": 6413 }, { "epoch": 1.2384630237497587, "grad_norm": 0.7054309248924255, "learning_rate": 3.8035475311122374e-05, "loss": 0.5946, "step": 6414 }, { "epoch": 1.2386561112183818, "grad_norm": 0.7765781283378601, "learning_rate": 3.801911510709627e-05, "loss": 0.6323, "step": 6415 }, { "epoch": 1.2388491986870052, "grad_norm": 1.0622285604476929, "learning_rate": 3.800275626379543e-05, "loss": 0.6248, "step": 6416 }, { "epoch": 1.2390422861556285, "grad_norm": 0.9944087862968445, "learning_rate": 3.7986398783077766e-05, "loss": 0.5409, "step": 6417 }, { "epoch": 1.2392353736242518, "grad_norm": 0.7748992443084717, "learning_rate": 3.797004266680107e-05, "loss": 0.5616, "step": 6418 }, { "epoch": 1.2394284610928752, "grad_norm": 0.8188245892524719, "learning_rate": 3.7953687916823e-05, "loss": 0.6072, "step": 6419 }, { "epoch": 1.2396215485614983, "grad_norm": 3.374345064163208, "learning_rate": 3.793733453500106e-05, "loss": 0.5952, "step": 6420 }, { "epoch": 1.2398146360301217, "grad_norm": 0.8647533059120178, "learning_rate": 3.792098252319254e-05, "loss": 0.6683, "step": 6421 }, { "epoch": 1.240007723498745, "grad_norm": 1.2057172060012817, "learning_rate": 3.790463188325465e-05, "loss": 0.5965, "step": 6422 }, { "epoch": 1.2402008109673681, "grad_norm": 0.9517672657966614, "learning_rate": 3.7888282617044385e-05, "loss": 0.6513, "step": 6423 }, { "epoch": 1.2403938984359915, "grad_norm": 0.8767876029014587, "learning_rate": 3.7871934726418626e-05, "loss": 0.5948, "step": 6424 }, { "epoch": 1.2405869859046148, "grad_norm": 1.2403744459152222, "learning_rate": 3.785558821323406e-05, "loss": 0.6373, "step": 6425 }, { "epoch": 1.2407800733732381, "grad_norm": 2.490623712539673, "learning_rate": 3.7839243079347234e-05, "loss": 0.6651, "step": 6426 }, { "epoch": 1.2409731608418613, "grad_norm": 1.9248528480529785, "learning_rate": 3.782289932661458e-05, "loss": 0.5978, "step": 6427 }, { "epoch": 1.2411662483104846, "grad_norm": 0.995022714138031, "learning_rate": 3.780655695689231e-05, "loss": 0.6119, "step": 6428 }, { "epoch": 1.241359335779108, "grad_norm": 1.002030849456787, "learning_rate": 3.779021597203649e-05, "loss": 0.607, "step": 6429 }, { "epoch": 1.2415524232477313, "grad_norm": 0.963371753692627, "learning_rate": 3.777387637390305e-05, "loss": 0.6337, "step": 6430 }, { "epoch": 1.2417455107163544, "grad_norm": 0.9194292426109314, "learning_rate": 3.7757538164347766e-05, "loss": 0.6096, "step": 6431 }, { "epoch": 1.2419385981849778, "grad_norm": 0.9794220924377441, "learning_rate": 3.7741201345226235e-05, "loss": 0.5849, "step": 6432 }, { "epoch": 1.242131685653601, "grad_norm": 1.8566205501556396, "learning_rate": 3.772486591839393e-05, "loss": 0.5688, "step": 6433 }, { "epoch": 1.2423247731222244, "grad_norm": 0.8828051686286926, "learning_rate": 3.770853188570609e-05, "loss": 0.5822, "step": 6434 }, { "epoch": 1.2425178605908476, "grad_norm": 0.9311500191688538, "learning_rate": 3.7692199249017904e-05, "loss": 0.5586, "step": 6435 }, { "epoch": 1.242710948059471, "grad_norm": 0.9774441123008728, "learning_rate": 3.7675868010184306e-05, "loss": 0.5758, "step": 6436 }, { "epoch": 1.2429040355280943, "grad_norm": 1.155746340751648, "learning_rate": 3.765953817106014e-05, "loss": 0.5811, "step": 6437 }, { "epoch": 1.2430971229967176, "grad_norm": 0.8906496167182922, "learning_rate": 3.764320973350003e-05, "loss": 0.5993, "step": 6438 }, { "epoch": 1.2432902104653407, "grad_norm": 0.9592586755752563, "learning_rate": 3.762688269935852e-05, "loss": 0.5532, "step": 6439 }, { "epoch": 1.243483297933964, "grad_norm": 2.221245527267456, "learning_rate": 3.7610557070489897e-05, "loss": 0.5683, "step": 6440 }, { "epoch": 1.2436763854025874, "grad_norm": 1.0140191316604614, "learning_rate": 3.759423284874837e-05, "loss": 0.5718, "step": 6441 }, { "epoch": 1.2438694728712107, "grad_norm": 1.052316665649414, "learning_rate": 3.757791003598795e-05, "loss": 0.5973, "step": 6442 }, { "epoch": 1.2440625603398339, "grad_norm": 0.8448288440704346, "learning_rate": 3.756158863406248e-05, "loss": 0.594, "step": 6443 }, { "epoch": 1.2442556478084572, "grad_norm": 0.7501600384712219, "learning_rate": 3.754526864482569e-05, "loss": 0.5544, "step": 6444 }, { "epoch": 1.2444487352770806, "grad_norm": 1.222733974456787, "learning_rate": 3.752895007013111e-05, "loss": 0.6159, "step": 6445 }, { "epoch": 1.244641822745704, "grad_norm": 1.3125250339508057, "learning_rate": 3.751263291183209e-05, "loss": 0.6512, "step": 6446 }, { "epoch": 1.244834910214327, "grad_norm": 0.8771355748176575, "learning_rate": 3.749631717178186e-05, "loss": 0.6239, "step": 6447 }, { "epoch": 1.2450279976829504, "grad_norm": 0.8483119010925293, "learning_rate": 3.7480002851833495e-05, "loss": 0.6235, "step": 6448 }, { "epoch": 1.2452210851515737, "grad_norm": 1.045129656791687, "learning_rate": 3.746368995383985e-05, "loss": 0.6573, "step": 6449 }, { "epoch": 1.245414172620197, "grad_norm": 2.122741222381592, "learning_rate": 3.7447378479653714e-05, "loss": 0.5999, "step": 6450 }, { "epoch": 1.2456072600888202, "grad_norm": 0.7186049222946167, "learning_rate": 3.7431068431127594e-05, "loss": 0.6088, "step": 6451 }, { "epoch": 1.2458003475574435, "grad_norm": 1.9068646430969238, "learning_rate": 3.741475981011394e-05, "loss": 0.6396, "step": 6452 }, { "epoch": 1.2459934350260669, "grad_norm": 0.8973543643951416, "learning_rate": 3.739845261846498e-05, "loss": 0.6109, "step": 6453 }, { "epoch": 1.24618652249469, "grad_norm": 2.8105621337890625, "learning_rate": 3.73821468580328e-05, "loss": 0.6387, "step": 6454 }, { "epoch": 1.2463796099633133, "grad_norm": 1.2069169282913208, "learning_rate": 3.7365842530669335e-05, "loss": 0.6512, "step": 6455 }, { "epoch": 1.2465726974319367, "grad_norm": 2.633355140686035, "learning_rate": 3.7349539638226355e-05, "loss": 0.615, "step": 6456 }, { "epoch": 1.24676578490056, "grad_norm": 1.0069910287857056, "learning_rate": 3.733323818255541e-05, "loss": 0.6548, "step": 6457 }, { "epoch": 1.2469588723691833, "grad_norm": 0.7856995463371277, "learning_rate": 3.731693816550795e-05, "loss": 0.5436, "step": 6458 }, { "epoch": 1.2471519598378065, "grad_norm": 0.9987779259681702, "learning_rate": 3.730063958893528e-05, "loss": 0.6061, "step": 6459 }, { "epoch": 1.2473450473064298, "grad_norm": 1.0515989065170288, "learning_rate": 3.728434245468846e-05, "loss": 0.5884, "step": 6460 }, { "epoch": 1.2475381347750532, "grad_norm": 1.5532732009887695, "learning_rate": 3.7268046764618446e-05, "loss": 0.5996, "step": 6461 }, { "epoch": 1.2477312222436763, "grad_norm": 2.0052294731140137, "learning_rate": 3.7251752520576044e-05, "loss": 0.6292, "step": 6462 }, { "epoch": 1.2479243097122996, "grad_norm": 1.174452781677246, "learning_rate": 3.723545972441184e-05, "loss": 0.6041, "step": 6463 }, { "epoch": 1.248117397180923, "grad_norm": 1.3633073568344116, "learning_rate": 3.721916837797627e-05, "loss": 0.6303, "step": 6464 }, { "epoch": 1.2483104846495463, "grad_norm": 1.323237657546997, "learning_rate": 3.7202878483119643e-05, "loss": 0.6554, "step": 6465 }, { "epoch": 1.2485035721181696, "grad_norm": 1.0696518421173096, "learning_rate": 3.718659004169207e-05, "loss": 0.6421, "step": 6466 }, { "epoch": 1.2486966595867928, "grad_norm": 1.1077123880386353, "learning_rate": 3.717030305554351e-05, "loss": 0.6547, "step": 6467 }, { "epoch": 1.248889747055416, "grad_norm": 1.6185439825057983, "learning_rate": 3.715401752652377e-05, "loss": 0.5477, "step": 6468 }, { "epoch": 1.2490828345240395, "grad_norm": 0.9236552119255066, "learning_rate": 3.713773345648242e-05, "loss": 0.6594, "step": 6469 }, { "epoch": 1.2492759219926626, "grad_norm": 1.2723076343536377, "learning_rate": 3.712145084726897e-05, "loss": 0.6164, "step": 6470 }, { "epoch": 1.249469009461286, "grad_norm": 0.8717854619026184, "learning_rate": 3.710516970073268e-05, "loss": 0.6234, "step": 6471 }, { "epoch": 1.2496620969299093, "grad_norm": 0.8486197590827942, "learning_rate": 3.70888900187227e-05, "loss": 0.5482, "step": 6472 }, { "epoch": 1.2498551843985326, "grad_norm": 1.9802266359329224, "learning_rate": 3.707261180308799e-05, "loss": 0.6517, "step": 6473 }, { "epoch": 1.250048271867156, "grad_norm": 0.8888166546821594, "learning_rate": 3.705633505567732e-05, "loss": 0.63, "step": 6474 }, { "epoch": 1.250241359335779, "grad_norm": 0.8228138089179993, "learning_rate": 3.7040059778339327e-05, "loss": 0.6702, "step": 6475 }, { "epoch": 1.2504344468044024, "grad_norm": 0.7038650512695312, "learning_rate": 3.702378597292246e-05, "loss": 0.6333, "step": 6476 }, { "epoch": 1.2506275342730258, "grad_norm": 0.766716480255127, "learning_rate": 3.700751364127505e-05, "loss": 0.6239, "step": 6477 }, { "epoch": 1.2508206217416489, "grad_norm": 0.9170817136764526, "learning_rate": 3.699124278524518e-05, "loss": 0.5884, "step": 6478 }, { "epoch": 1.2510137092102722, "grad_norm": 0.8584294319152832, "learning_rate": 3.6974973406680844e-05, "loss": 0.645, "step": 6479 }, { "epoch": 1.2512067966788956, "grad_norm": 0.998694896697998, "learning_rate": 3.6958705507429794e-05, "loss": 0.5397, "step": 6480 }, { "epoch": 1.251399884147519, "grad_norm": 1.041566252708435, "learning_rate": 3.6942439089339676e-05, "loss": 0.6164, "step": 6481 }, { "epoch": 1.2515929716161422, "grad_norm": 0.861096978187561, "learning_rate": 3.692617415425793e-05, "loss": 0.533, "step": 6482 }, { "epoch": 1.2517860590847654, "grad_norm": 0.9948549270629883, "learning_rate": 3.690991070403185e-05, "loss": 0.6777, "step": 6483 }, { "epoch": 1.2519791465533887, "grad_norm": 1.1050249338150024, "learning_rate": 3.689364874050854e-05, "loss": 0.6505, "step": 6484 }, { "epoch": 1.252172234022012, "grad_norm": 0.7458831071853638, "learning_rate": 3.687738826553498e-05, "loss": 0.6306, "step": 6485 }, { "epoch": 1.2523653214906352, "grad_norm": 1.0899970531463623, "learning_rate": 3.68611292809579e-05, "loss": 0.6044, "step": 6486 }, { "epoch": 1.2525584089592585, "grad_norm": 1.1527249813079834, "learning_rate": 3.6844871788623945e-05, "loss": 0.6569, "step": 6487 }, { "epoch": 1.2527514964278819, "grad_norm": 1.3733443021774292, "learning_rate": 3.682861579037954e-05, "loss": 0.6297, "step": 6488 }, { "epoch": 1.2529445838965052, "grad_norm": 2.1123573780059814, "learning_rate": 3.681236128807095e-05, "loss": 0.6682, "step": 6489 }, { "epoch": 1.2531376713651283, "grad_norm": 0.9540761709213257, "learning_rate": 3.679610828354429e-05, "loss": 0.6344, "step": 6490 }, { "epoch": 1.2533307588337517, "grad_norm": 1.3169338703155518, "learning_rate": 3.67798567786455e-05, "loss": 0.6308, "step": 6491 }, { "epoch": 1.253523846302375, "grad_norm": 0.8736795783042908, "learning_rate": 3.676360677522031e-05, "loss": 0.5924, "step": 6492 }, { "epoch": 1.2537169337709981, "grad_norm": 1.1128069162368774, "learning_rate": 3.674735827511432e-05, "loss": 0.5779, "step": 6493 }, { "epoch": 1.2539100212396215, "grad_norm": 1.4609827995300293, "learning_rate": 3.673111128017295e-05, "loss": 0.5795, "step": 6494 }, { "epoch": 1.2541031087082448, "grad_norm": 0.9415249228477478, "learning_rate": 3.671486579224144e-05, "loss": 0.577, "step": 6495 }, { "epoch": 1.2542961961768682, "grad_norm": 0.9708837866783142, "learning_rate": 3.66986218131649e-05, "loss": 0.6301, "step": 6496 }, { "epoch": 1.2544892836454915, "grad_norm": 1.6382590532302856, "learning_rate": 3.6682379344788185e-05, "loss": 0.6231, "step": 6497 }, { "epoch": 1.2546823711141146, "grad_norm": 1.0234400033950806, "learning_rate": 3.666613838895606e-05, "loss": 0.6633, "step": 6498 }, { "epoch": 1.254875458582738, "grad_norm": 1.2348493337631226, "learning_rate": 3.664989894751307e-05, "loss": 0.5919, "step": 6499 }, { "epoch": 1.2550685460513613, "grad_norm": 0.7841493487358093, "learning_rate": 3.663366102230361e-05, "loss": 0.6538, "step": 6500 }, { "epoch": 1.2550685460513613, "eval_loss": 0.6631048917770386, "eval_runtime": 50.8837, "eval_samples_per_second": 13.049, "eval_steps_per_second": 0.413, "step": 6500 }, { "epoch": 1.2552616335199844, "grad_norm": 0.7674158215522766, "learning_rate": 3.661742461517191e-05, "loss": 0.5762, "step": 6501 }, { "epoch": 1.2554547209886078, "grad_norm": 0.8659891486167908, "learning_rate": 3.660118972796202e-05, "loss": 0.6153, "step": 6502 }, { "epoch": 1.2556478084572311, "grad_norm": 1.3435041904449463, "learning_rate": 3.658495636251778e-05, "loss": 0.6193, "step": 6503 }, { "epoch": 1.2558408959258545, "grad_norm": 9.238954544067383, "learning_rate": 3.6568724520682896e-05, "loss": 0.5498, "step": 6504 }, { "epoch": 1.2560339833944778, "grad_norm": 1.0330891609191895, "learning_rate": 3.6552494204300914e-05, "loss": 0.6064, "step": 6505 }, { "epoch": 1.256227070863101, "grad_norm": 0.9068989753723145, "learning_rate": 3.6536265415215173e-05, "loss": 0.6157, "step": 6506 }, { "epoch": 1.2564201583317243, "grad_norm": 1.2225698232650757, "learning_rate": 3.652003815526886e-05, "loss": 0.6224, "step": 6507 }, { "epoch": 1.2566132458003476, "grad_norm": 0.8071541786193848, "learning_rate": 3.650381242630499e-05, "loss": 0.6067, "step": 6508 }, { "epoch": 1.2568063332689707, "grad_norm": 0.8536320924758911, "learning_rate": 3.6487588230166384e-05, "loss": 0.618, "step": 6509 }, { "epoch": 1.256999420737594, "grad_norm": 0.8928225040435791, "learning_rate": 3.647136556869568e-05, "loss": 0.599, "step": 6510 }, { "epoch": 1.2571925082062174, "grad_norm": 1.348930835723877, "learning_rate": 3.645514444373538e-05, "loss": 0.5931, "step": 6511 }, { "epoch": 1.2573855956748408, "grad_norm": 1.3849549293518066, "learning_rate": 3.643892485712781e-05, "loss": 0.6061, "step": 6512 }, { "epoch": 1.257578683143464, "grad_norm": 1.0784705877304077, "learning_rate": 3.6422706810715104e-05, "loss": 0.6052, "step": 6513 }, { "epoch": 1.2577717706120872, "grad_norm": 1.0440630912780762, "learning_rate": 3.640649030633917e-05, "loss": 0.5879, "step": 6514 }, { "epoch": 1.2579648580807106, "grad_norm": 0.7626563906669617, "learning_rate": 3.639027534584184e-05, "loss": 0.5692, "step": 6515 }, { "epoch": 1.258157945549334, "grad_norm": 0.7403086423873901, "learning_rate": 3.637406193106472e-05, "loss": 0.6174, "step": 6516 }, { "epoch": 1.258351033017957, "grad_norm": 3.0044331550598145, "learning_rate": 3.635785006384923e-05, "loss": 0.6307, "step": 6517 }, { "epoch": 1.2585441204865804, "grad_norm": 0.6162208914756775, "learning_rate": 3.6341639746036626e-05, "loss": 0.5278, "step": 6518 }, { "epoch": 1.2587372079552037, "grad_norm": 0.9862024188041687, "learning_rate": 3.632543097946802e-05, "loss": 0.5945, "step": 6519 }, { "epoch": 1.258930295423827, "grad_norm": 1.6101042032241821, "learning_rate": 3.630922376598428e-05, "loss": 0.5924, "step": 6520 }, { "epoch": 1.2591233828924504, "grad_norm": 0.733856737613678, "learning_rate": 3.629301810742614e-05, "loss": 0.6115, "step": 6521 }, { "epoch": 1.2593164703610735, "grad_norm": 1.2502723932266235, "learning_rate": 3.627681400563416e-05, "loss": 0.5988, "step": 6522 }, { "epoch": 1.2595095578296969, "grad_norm": 0.9953559041023254, "learning_rate": 3.626061146244874e-05, "loss": 0.5897, "step": 6523 }, { "epoch": 1.2597026452983202, "grad_norm": 0.9359687566757202, "learning_rate": 3.624441047971003e-05, "loss": 0.6191, "step": 6524 }, { "epoch": 1.2598957327669433, "grad_norm": 1.3652355670928955, "learning_rate": 3.622821105925811e-05, "loss": 0.5943, "step": 6525 }, { "epoch": 1.2600888202355667, "grad_norm": 0.8411758542060852, "learning_rate": 3.6212013202932784e-05, "loss": 0.6582, "step": 6526 }, { "epoch": 1.26028190770419, "grad_norm": 0.9110528230667114, "learning_rate": 3.619581691257371e-05, "loss": 0.6354, "step": 6527 }, { "epoch": 1.2604749951728134, "grad_norm": 1.0825247764587402, "learning_rate": 3.617962219002039e-05, "loss": 0.6606, "step": 6528 }, { "epoch": 1.2606680826414367, "grad_norm": 0.6729860305786133, "learning_rate": 3.616342903711215e-05, "loss": 0.5848, "step": 6529 }, { "epoch": 1.2608611701100598, "grad_norm": 0.9453240633010864, "learning_rate": 3.61472374556881e-05, "loss": 0.5548, "step": 6530 }, { "epoch": 1.2610542575786832, "grad_norm": 0.8896203637123108, "learning_rate": 3.6131047447587224e-05, "loss": 0.6183, "step": 6531 }, { "epoch": 1.2612473450473065, "grad_norm": 2.0079164505004883, "learning_rate": 3.611485901464826e-05, "loss": 0.5841, "step": 6532 }, { "epoch": 1.2614404325159296, "grad_norm": 0.8218279480934143, "learning_rate": 3.609867215870982e-05, "loss": 0.5652, "step": 6533 }, { "epoch": 1.261633519984553, "grad_norm": 1.3897143602371216, "learning_rate": 3.6082486881610324e-05, "loss": 0.6248, "step": 6534 }, { "epoch": 1.2618266074531763, "grad_norm": 0.8364444971084595, "learning_rate": 3.6066303185188005e-05, "loss": 0.6326, "step": 6535 }, { "epoch": 1.2620196949217997, "grad_norm": 1.02196204662323, "learning_rate": 3.605012107128094e-05, "loss": 0.5666, "step": 6536 }, { "epoch": 1.2622127823904228, "grad_norm": 0.9117765426635742, "learning_rate": 3.6033940541726986e-05, "loss": 0.6414, "step": 6537 }, { "epoch": 1.2624058698590461, "grad_norm": 1.2764605283737183, "learning_rate": 3.601776159836383e-05, "loss": 0.6425, "step": 6538 }, { "epoch": 1.2625989573276695, "grad_norm": 1.4104573726654053, "learning_rate": 3.600158424302902e-05, "loss": 0.6273, "step": 6539 }, { "epoch": 1.2627920447962926, "grad_norm": 0.891403079032898, "learning_rate": 3.5985408477559884e-05, "loss": 0.6431, "step": 6540 }, { "epoch": 1.262985132264916, "grad_norm": 0.6646254062652588, "learning_rate": 3.5969234303793565e-05, "loss": 0.5491, "step": 6541 }, { "epoch": 1.2631782197335393, "grad_norm": 1.250159502029419, "learning_rate": 3.595306172356708e-05, "loss": 0.7362, "step": 6542 }, { "epoch": 1.2633713072021626, "grad_norm": 0.7961552739143372, "learning_rate": 3.593689073871716e-05, "loss": 0.591, "step": 6543 }, { "epoch": 1.263564394670786, "grad_norm": 1.1091288328170776, "learning_rate": 3.592072135108048e-05, "loss": 0.6228, "step": 6544 }, { "epoch": 1.263757482139409, "grad_norm": 0.7260914444923401, "learning_rate": 3.590455356249344e-05, "loss": 0.6217, "step": 6545 }, { "epoch": 1.2639505696080324, "grad_norm": 0.8211521506309509, "learning_rate": 3.588838737479229e-05, "loss": 0.6506, "step": 6546 }, { "epoch": 1.2641436570766558, "grad_norm": 2.632300853729248, "learning_rate": 3.587222278981313e-05, "loss": 0.5824, "step": 6547 }, { "epoch": 1.264336744545279, "grad_norm": 1.3089618682861328, "learning_rate": 3.585605980939183e-05, "loss": 0.6007, "step": 6548 }, { "epoch": 1.2645298320139022, "grad_norm": 2.348975419998169, "learning_rate": 3.583989843536408e-05, "loss": 0.5822, "step": 6549 }, { "epoch": 1.2647229194825256, "grad_norm": 0.5436274409294128, "learning_rate": 3.5823738669565414e-05, "loss": 0.581, "step": 6550 }, { "epoch": 1.264916006951149, "grad_norm": 1.2507035732269287, "learning_rate": 3.580758051383118e-05, "loss": 0.6228, "step": 6551 }, { "epoch": 1.2651090944197723, "grad_norm": 1.0725446939468384, "learning_rate": 3.579142396999653e-05, "loss": 0.5515, "step": 6552 }, { "epoch": 1.2653021818883954, "grad_norm": 0.8452393412590027, "learning_rate": 3.577526903989643e-05, "loss": 0.6278, "step": 6553 }, { "epoch": 1.2654952693570187, "grad_norm": 1.6071661710739136, "learning_rate": 3.5759115725365706e-05, "loss": 0.5569, "step": 6554 }, { "epoch": 1.265688356825642, "grad_norm": 2.1964213848114014, "learning_rate": 3.574296402823892e-05, "loss": 0.6524, "step": 6555 }, { "epoch": 1.2658814442942652, "grad_norm": 0.9086995720863342, "learning_rate": 3.572681395035051e-05, "loss": 0.6199, "step": 6556 }, { "epoch": 1.2660745317628885, "grad_norm": 0.7446884512901306, "learning_rate": 3.5710665493534725e-05, "loss": 0.5569, "step": 6557 }, { "epoch": 1.2662676192315119, "grad_norm": 0.9142442345619202, "learning_rate": 3.5694518659625625e-05, "loss": 0.6293, "step": 6558 }, { "epoch": 1.2664607067001352, "grad_norm": 1.0428810119628906, "learning_rate": 3.567837345045708e-05, "loss": 0.5932, "step": 6559 }, { "epoch": 1.2666537941687586, "grad_norm": 0.7398931980133057, "learning_rate": 3.566222986786276e-05, "loss": 0.6069, "step": 6560 }, { "epoch": 1.2668468816373817, "grad_norm": 1.0937758684158325, "learning_rate": 3.5646087913676167e-05, "loss": 0.5507, "step": 6561 }, { "epoch": 1.267039969106005, "grad_norm": 0.6499742269515991, "learning_rate": 3.562994758973065e-05, "loss": 0.5592, "step": 6562 }, { "epoch": 1.2672330565746284, "grad_norm": 1.0542457103729248, "learning_rate": 3.5613808897859305e-05, "loss": 0.6869, "step": 6563 }, { "epoch": 1.2674261440432515, "grad_norm": 1.2423264980316162, "learning_rate": 3.5597671839895096e-05, "loss": 0.6074, "step": 6564 }, { "epoch": 1.2676192315118748, "grad_norm": 1.3735185861587524, "learning_rate": 3.558153641767079e-05, "loss": 0.6221, "step": 6565 }, { "epoch": 1.2678123189804982, "grad_norm": 1.5294127464294434, "learning_rate": 3.556540263301896e-05, "loss": 0.6447, "step": 6566 }, { "epoch": 1.2680054064491215, "grad_norm": 0.9347058534622192, "learning_rate": 3.554927048777198e-05, "loss": 0.5671, "step": 6567 }, { "epoch": 1.2681984939177449, "grad_norm": 0.8494552373886108, "learning_rate": 3.553313998376206e-05, "loss": 0.5941, "step": 6568 }, { "epoch": 1.268391581386368, "grad_norm": 1.0547924041748047, "learning_rate": 3.551701112282123e-05, "loss": 0.6647, "step": 6569 }, { "epoch": 1.2685846688549913, "grad_norm": 1.0770927667617798, "learning_rate": 3.55008839067813e-05, "loss": 0.6126, "step": 6570 }, { "epoch": 1.2687777563236147, "grad_norm": 1.2833172082901, "learning_rate": 3.548475833747395e-05, "loss": 0.6166, "step": 6571 }, { "epoch": 1.2689708437922378, "grad_norm": 0.7537060976028442, "learning_rate": 3.546863441673061e-05, "loss": 0.5879, "step": 6572 }, { "epoch": 1.2691639312608611, "grad_norm": 0.8643835186958313, "learning_rate": 3.545251214638252e-05, "loss": 0.603, "step": 6573 }, { "epoch": 1.2693570187294845, "grad_norm": 0.7092623710632324, "learning_rate": 3.5436391528260806e-05, "loss": 0.6407, "step": 6574 }, { "epoch": 1.2695501061981078, "grad_norm": 0.7162409424781799, "learning_rate": 3.5420272564196354e-05, "loss": 0.6915, "step": 6575 }, { "epoch": 1.2697431936667312, "grad_norm": 0.6796663403511047, "learning_rate": 3.5404155256019875e-05, "loss": 0.5509, "step": 6576 }, { "epoch": 1.2699362811353543, "grad_norm": 0.8334065675735474, "learning_rate": 3.538803960556186e-05, "loss": 0.6445, "step": 6577 }, { "epoch": 1.2701293686039776, "grad_norm": 1.0350592136383057, "learning_rate": 3.537192561465265e-05, "loss": 0.6234, "step": 6578 }, { "epoch": 1.270322456072601, "grad_norm": 1.5738106966018677, "learning_rate": 3.535581328512241e-05, "loss": 0.566, "step": 6579 }, { "epoch": 1.270515543541224, "grad_norm": 0.7379257082939148, "learning_rate": 3.533970261880106e-05, "loss": 0.6307, "step": 6580 }, { "epoch": 1.2707086310098474, "grad_norm": 0.7711265087127686, "learning_rate": 3.532359361751838e-05, "loss": 0.6105, "step": 6581 }, { "epoch": 1.2709017184784708, "grad_norm": 0.9347100853919983, "learning_rate": 3.5307486283103966e-05, "loss": 0.6188, "step": 6582 }, { "epoch": 1.2710948059470941, "grad_norm": 4.354243278503418, "learning_rate": 3.529138061738717e-05, "loss": 0.5821, "step": 6583 }, { "epoch": 1.2712878934157172, "grad_norm": 0.7072728276252747, "learning_rate": 3.5275276622197184e-05, "loss": 0.5609, "step": 6584 }, { "epoch": 1.2714809808843406, "grad_norm": 1.993704915046692, "learning_rate": 3.525917429936304e-05, "loss": 0.6154, "step": 6585 }, { "epoch": 1.271674068352964, "grad_norm": 1.0678396224975586, "learning_rate": 3.524307365071354e-05, "loss": 0.6015, "step": 6586 }, { "epoch": 1.271867155821587, "grad_norm": 0.687320351600647, "learning_rate": 3.522697467807732e-05, "loss": 0.5613, "step": 6587 }, { "epoch": 1.2720602432902104, "grad_norm": 0.8069887757301331, "learning_rate": 3.521087738328282e-05, "loss": 0.6673, "step": 6588 }, { "epoch": 1.2722533307588337, "grad_norm": 1.1710951328277588, "learning_rate": 3.519478176815825e-05, "loss": 0.5694, "step": 6589 }, { "epoch": 1.272446418227457, "grad_norm": 0.6618784666061401, "learning_rate": 3.5178687834531703e-05, "loss": 0.6024, "step": 6590 }, { "epoch": 1.2726395056960804, "grad_norm": 0.965945839881897, "learning_rate": 3.516259558423102e-05, "loss": 0.5736, "step": 6591 }, { "epoch": 1.2728325931647035, "grad_norm": 1.4325400590896606, "learning_rate": 3.514650501908389e-05, "loss": 0.6039, "step": 6592 }, { "epoch": 1.2730256806333269, "grad_norm": 1.057395339012146, "learning_rate": 3.513041614091778e-05, "loss": 0.6218, "step": 6593 }, { "epoch": 1.2732187681019502, "grad_norm": 0.7515102624893188, "learning_rate": 3.5114328951560005e-05, "loss": 0.6125, "step": 6594 }, { "epoch": 1.2734118555705733, "grad_norm": 0.780408501625061, "learning_rate": 3.5098243452837623e-05, "loss": 0.6555, "step": 6595 }, { "epoch": 1.2736049430391967, "grad_norm": 3.3303656578063965, "learning_rate": 3.508215964657755e-05, "loss": 0.6333, "step": 6596 }, { "epoch": 1.27379803050782, "grad_norm": 51.72332000732422, "learning_rate": 3.506607753460652e-05, "loss": 0.5773, "step": 6597 }, { "epoch": 1.2739911179764434, "grad_norm": 0.8835871815681458, "learning_rate": 3.504999711875104e-05, "loss": 0.6297, "step": 6598 }, { "epoch": 1.2741842054450667, "grad_norm": 0.8796908259391785, "learning_rate": 3.503391840083746e-05, "loss": 0.5537, "step": 6599 }, { "epoch": 1.2743772929136898, "grad_norm": 1.6812329292297363, "learning_rate": 3.501784138269187e-05, "loss": 0.5866, "step": 6600 }, { "epoch": 1.2745703803823132, "grad_norm": 0.9665578007698059, "learning_rate": 3.500176606614025e-05, "loss": 0.6073, "step": 6601 }, { "epoch": 1.2747634678509365, "grad_norm": 1.9042335748672485, "learning_rate": 3.498569245300832e-05, "loss": 0.5557, "step": 6602 }, { "epoch": 1.2749565553195596, "grad_norm": 1.8870186805725098, "learning_rate": 3.496962054512166e-05, "loss": 0.6086, "step": 6603 }, { "epoch": 1.275149642788183, "grad_norm": 0.9744917750358582, "learning_rate": 3.495355034430564e-05, "loss": 0.6022, "step": 6604 }, { "epoch": 1.2753427302568063, "grad_norm": 0.7738450169563293, "learning_rate": 3.493748185238541e-05, "loss": 0.5481, "step": 6605 }, { "epoch": 1.2755358177254297, "grad_norm": 1.5054112672805786, "learning_rate": 3.492141507118594e-05, "loss": 0.5642, "step": 6606 }, { "epoch": 1.275728905194053, "grad_norm": 0.862246572971344, "learning_rate": 3.4905350002532e-05, "loss": 0.6806, "step": 6607 }, { "epoch": 1.2759219926626761, "grad_norm": 0.7876933813095093, "learning_rate": 3.488928664824821e-05, "loss": 0.5994, "step": 6608 }, { "epoch": 1.2761150801312995, "grad_norm": 0.8256751298904419, "learning_rate": 3.4873225010158925e-05, "loss": 0.6179, "step": 6609 }, { "epoch": 1.2763081675999228, "grad_norm": 1.2653132677078247, "learning_rate": 3.485716509008837e-05, "loss": 0.6345, "step": 6610 }, { "epoch": 1.276501255068546, "grad_norm": 0.9197655916213989, "learning_rate": 3.4841106889860536e-05, "loss": 0.5989, "step": 6611 }, { "epoch": 1.2766943425371693, "grad_norm": 0.8071110844612122, "learning_rate": 3.482505041129922e-05, "loss": 0.5982, "step": 6612 }, { "epoch": 1.2768874300057926, "grad_norm": 0.8998839855194092, "learning_rate": 3.480899565622803e-05, "loss": 0.6788, "step": 6613 }, { "epoch": 1.277080517474416, "grad_norm": 1.4039945602416992, "learning_rate": 3.479294262647039e-05, "loss": 0.6347, "step": 6614 }, { "epoch": 1.2772736049430393, "grad_norm": 1.6100833415985107, "learning_rate": 3.477689132384951e-05, "loss": 0.6381, "step": 6615 }, { "epoch": 1.2774666924116624, "grad_norm": 0.6770631670951843, "learning_rate": 3.476084175018841e-05, "loss": 0.6677, "step": 6616 }, { "epoch": 1.2776597798802858, "grad_norm": 0.9710166454315186, "learning_rate": 3.474479390730995e-05, "loss": 0.552, "step": 6617 }, { "epoch": 1.2778528673489091, "grad_norm": 0.8886160850524902, "learning_rate": 3.4728747797036706e-05, "loss": 0.5435, "step": 6618 }, { "epoch": 1.2780459548175322, "grad_norm": 0.7664022445678711, "learning_rate": 3.471270342119113e-05, "loss": 0.5661, "step": 6619 }, { "epoch": 1.2782390422861556, "grad_norm": 2.3891167640686035, "learning_rate": 3.469666078159546e-05, "loss": 0.5669, "step": 6620 }, { "epoch": 1.278432129754779, "grad_norm": 0.9996196031570435, "learning_rate": 3.468061988007174e-05, "loss": 0.6355, "step": 6621 }, { "epoch": 1.2786252172234023, "grad_norm": 0.8778009414672852, "learning_rate": 3.466458071844181e-05, "loss": 0.6167, "step": 6622 }, { "epoch": 1.2788183046920256, "grad_norm": 0.9791578650474548, "learning_rate": 3.464854329852729e-05, "loss": 0.5934, "step": 6623 }, { "epoch": 1.2790113921606487, "grad_norm": 0.7930585145950317, "learning_rate": 3.463250762214965e-05, "loss": 0.5929, "step": 6624 }, { "epoch": 1.279204479629272, "grad_norm": 0.8697580099105835, "learning_rate": 3.461647369113013e-05, "loss": 0.7577, "step": 6625 }, { "epoch": 1.2793975670978954, "grad_norm": 1.2494138479232788, "learning_rate": 3.4600441507289774e-05, "loss": 0.5919, "step": 6626 }, { "epoch": 1.2795906545665185, "grad_norm": 0.6950243711471558, "learning_rate": 3.458441107244942e-05, "loss": 0.6597, "step": 6627 }, { "epoch": 1.279783742035142, "grad_norm": 1.3560791015625, "learning_rate": 3.456838238842977e-05, "loss": 0.6141, "step": 6628 }, { "epoch": 1.2799768295037652, "grad_norm": 0.7340339422225952, "learning_rate": 3.4552355457051235e-05, "loss": 0.5895, "step": 6629 }, { "epoch": 1.2801699169723886, "grad_norm": 1.1579927206039429, "learning_rate": 3.4536330280134055e-05, "loss": 0.5692, "step": 6630 }, { "epoch": 1.2803630044410117, "grad_norm": 0.932417631149292, "learning_rate": 3.452030685949831e-05, "loss": 0.6088, "step": 6631 }, { "epoch": 1.280556091909635, "grad_norm": 0.7593685388565063, "learning_rate": 3.450428519696386e-05, "loss": 0.6379, "step": 6632 }, { "epoch": 1.2807491793782584, "grad_norm": 0.8060951232910156, "learning_rate": 3.448826529435033e-05, "loss": 0.6794, "step": 6633 }, { "epoch": 1.2809422668468815, "grad_norm": 0.7937268018722534, "learning_rate": 3.447224715347723e-05, "loss": 0.6179, "step": 6634 }, { "epoch": 1.2811353543155048, "grad_norm": 0.5886716246604919, "learning_rate": 3.445623077616375e-05, "loss": 0.5991, "step": 6635 }, { "epoch": 1.2813284417841282, "grad_norm": 0.8873651623725891, "learning_rate": 3.4440216164228995e-05, "loss": 0.6244, "step": 6636 }, { "epoch": 1.2815215292527515, "grad_norm": 0.7767665982246399, "learning_rate": 3.442420331949179e-05, "loss": 0.5072, "step": 6637 }, { "epoch": 1.2817146167213749, "grad_norm": 0.9890618324279785, "learning_rate": 3.4408192243770795e-05, "loss": 0.6118, "step": 6638 }, { "epoch": 1.281907704189998, "grad_norm": 0.8350163698196411, "learning_rate": 3.4392182938884485e-05, "loss": 0.5795, "step": 6639 }, { "epoch": 1.2821007916586213, "grad_norm": 0.8155163526535034, "learning_rate": 3.437617540665109e-05, "loss": 0.5893, "step": 6640 }, { "epoch": 1.2822938791272447, "grad_norm": 0.7880723476409912, "learning_rate": 3.436016964888865e-05, "loss": 0.627, "step": 6641 }, { "epoch": 1.2824869665958678, "grad_norm": 0.8164938688278198, "learning_rate": 3.434416566741503e-05, "loss": 0.6166, "step": 6642 }, { "epoch": 1.2826800540644911, "grad_norm": 0.829433262348175, "learning_rate": 3.432816346404789e-05, "loss": 0.6067, "step": 6643 }, { "epoch": 1.2828731415331145, "grad_norm": 0.7420169711112976, "learning_rate": 3.431216304060464e-05, "loss": 0.5721, "step": 6644 }, { "epoch": 1.2830662290017378, "grad_norm": 0.9057928323745728, "learning_rate": 3.429616439890258e-05, "loss": 0.6298, "step": 6645 }, { "epoch": 1.2832593164703612, "grad_norm": 0.7194569706916809, "learning_rate": 3.428016754075868e-05, "loss": 0.6095, "step": 6646 }, { "epoch": 1.2834524039389843, "grad_norm": 0.719963788986206, "learning_rate": 3.4264172467989816e-05, "loss": 0.536, "step": 6647 }, { "epoch": 1.2836454914076076, "grad_norm": 2.080575942993164, "learning_rate": 3.424817918241263e-05, "loss": 0.592, "step": 6648 }, { "epoch": 1.283838578876231, "grad_norm": 0.9846402406692505, "learning_rate": 3.4232187685843545e-05, "loss": 0.6593, "step": 6649 }, { "epoch": 1.284031666344854, "grad_norm": 0.893232524394989, "learning_rate": 3.421619798009877e-05, "loss": 0.5552, "step": 6650 }, { "epoch": 1.2842247538134774, "grad_norm": 0.7353765368461609, "learning_rate": 3.420021006699439e-05, "loss": 0.6019, "step": 6651 }, { "epoch": 1.2844178412821008, "grad_norm": 2.2726047039031982, "learning_rate": 3.418422394834616e-05, "loss": 0.6554, "step": 6652 }, { "epoch": 1.2846109287507241, "grad_norm": 3.4868059158325195, "learning_rate": 3.416823962596973e-05, "loss": 0.6462, "step": 6653 }, { "epoch": 1.2848040162193475, "grad_norm": 0.871853768825531, "learning_rate": 3.41522571016805e-05, "loss": 0.5405, "step": 6654 }, { "epoch": 1.2849971036879706, "grad_norm": 0.7878033518791199, "learning_rate": 3.41362763772937e-05, "loss": 0.6219, "step": 6655 }, { "epoch": 1.285190191156594, "grad_norm": 0.9986473321914673, "learning_rate": 3.4120297454624325e-05, "loss": 0.5854, "step": 6656 }, { "epoch": 1.2853832786252173, "grad_norm": 0.9306585192680359, "learning_rate": 3.410432033548719e-05, "loss": 0.6271, "step": 6657 }, { "epoch": 1.2855763660938404, "grad_norm": 0.9213270545005798, "learning_rate": 3.4088345021696864e-05, "loss": 0.6249, "step": 6658 }, { "epoch": 1.2857694535624637, "grad_norm": 1.0513780117034912, "learning_rate": 3.4072371515067755e-05, "loss": 0.6344, "step": 6659 }, { "epoch": 1.285962541031087, "grad_norm": 0.7379976511001587, "learning_rate": 3.4056399817414054e-05, "loss": 0.6365, "step": 6660 }, { "epoch": 1.2861556284997104, "grad_norm": 1.0401806831359863, "learning_rate": 3.4040429930549725e-05, "loss": 0.566, "step": 6661 }, { "epoch": 1.2863487159683338, "grad_norm": 1.5283968448638916, "learning_rate": 3.402446185628857e-05, "loss": 0.6009, "step": 6662 }, { "epoch": 1.286541803436957, "grad_norm": 0.9889905452728271, "learning_rate": 3.400849559644412e-05, "loss": 0.6337, "step": 6663 }, { "epoch": 1.2867348909055802, "grad_norm": 2.5083000659942627, "learning_rate": 3.399253115282977e-05, "loss": 0.6516, "step": 6664 }, { "epoch": 1.2869279783742036, "grad_norm": 0.7792137861251831, "learning_rate": 3.397656852725866e-05, "loss": 0.6276, "step": 6665 }, { "epoch": 1.2871210658428267, "grad_norm": 0.766717791557312, "learning_rate": 3.3960607721543736e-05, "loss": 0.5821, "step": 6666 }, { "epoch": 1.28731415331145, "grad_norm": 1.1189385652542114, "learning_rate": 3.394464873749776e-05, "loss": 0.6352, "step": 6667 }, { "epoch": 1.2875072407800734, "grad_norm": 0.6048223972320557, "learning_rate": 3.392869157693328e-05, "loss": 0.5681, "step": 6668 }, { "epoch": 1.2877003282486967, "grad_norm": 0.7608875632286072, "learning_rate": 3.391273624166258e-05, "loss": 0.638, "step": 6669 }, { "epoch": 1.28789341571732, "grad_norm": 0.6906620264053345, "learning_rate": 3.38967827334978e-05, "loss": 0.5989, "step": 6670 }, { "epoch": 1.2880865031859432, "grad_norm": 0.8970067501068115, "learning_rate": 3.3880831054250875e-05, "loss": 0.5702, "step": 6671 }, { "epoch": 1.2882795906545665, "grad_norm": 0.5574281811714172, "learning_rate": 3.386488120573349e-05, "loss": 0.6172, "step": 6672 }, { "epoch": 1.2884726781231899, "grad_norm": 0.7717649340629578, "learning_rate": 3.3848933189757156e-05, "loss": 0.6336, "step": 6673 }, { "epoch": 1.288665765591813, "grad_norm": 0.7203373312950134, "learning_rate": 3.3832987008133174e-05, "loss": 0.6539, "step": 6674 }, { "epoch": 1.2888588530604363, "grad_norm": 1.0328807830810547, "learning_rate": 3.3817042662672614e-05, "loss": 0.6447, "step": 6675 }, { "epoch": 1.2890519405290597, "grad_norm": 0.9193724393844604, "learning_rate": 3.3801100155186327e-05, "loss": 0.579, "step": 6676 }, { "epoch": 1.289245027997683, "grad_norm": 0.8543436527252197, "learning_rate": 3.378515948748501e-05, "loss": 0.6921, "step": 6677 }, { "epoch": 1.2894381154663062, "grad_norm": 0.8084989786148071, "learning_rate": 3.376922066137912e-05, "loss": 0.6389, "step": 6678 }, { "epoch": 1.2896312029349295, "grad_norm": 2.0632810592651367, "learning_rate": 3.375328367867887e-05, "loss": 0.5949, "step": 6679 }, { "epoch": 1.2898242904035528, "grad_norm": 0.8293903470039368, "learning_rate": 3.373734854119436e-05, "loss": 0.592, "step": 6680 }, { "epoch": 1.290017377872176, "grad_norm": 0.5888218879699707, "learning_rate": 3.3721415250735345e-05, "loss": 0.5592, "step": 6681 }, { "epoch": 1.2902104653407993, "grad_norm": 0.9379208087921143, "learning_rate": 3.370548380911149e-05, "loss": 0.6425, "step": 6682 }, { "epoch": 1.2904035528094226, "grad_norm": 1.1948734521865845, "learning_rate": 3.368955421813219e-05, "loss": 0.6326, "step": 6683 }, { "epoch": 1.290596640278046, "grad_norm": 0.6533461809158325, "learning_rate": 3.367362647960664e-05, "loss": 0.567, "step": 6684 }, { "epoch": 1.2907897277466693, "grad_norm": 1.0347883701324463, "learning_rate": 3.365770059534386e-05, "loss": 0.5894, "step": 6685 }, { "epoch": 1.2909828152152925, "grad_norm": 0.887136697769165, "learning_rate": 3.364177656715258e-05, "loss": 0.5771, "step": 6686 }, { "epoch": 1.2911759026839158, "grad_norm": 0.9248490333557129, "learning_rate": 3.362585439684138e-05, "loss": 0.6788, "step": 6687 }, { "epoch": 1.2913689901525391, "grad_norm": 14.598177909851074, "learning_rate": 3.360993408621863e-05, "loss": 0.5689, "step": 6688 }, { "epoch": 1.2915620776211623, "grad_norm": 0.7835547924041748, "learning_rate": 3.359401563709247e-05, "loss": 0.6893, "step": 6689 }, { "epoch": 1.2917551650897856, "grad_norm": 0.7781590819358826, "learning_rate": 3.3578099051270816e-05, "loss": 0.5966, "step": 6690 }, { "epoch": 1.291948252558409, "grad_norm": 0.9314383864402771, "learning_rate": 3.356218433056143e-05, "loss": 0.5747, "step": 6691 }, { "epoch": 1.2921413400270323, "grad_norm": 0.7552959322929382, "learning_rate": 3.354627147677177e-05, "loss": 0.6245, "step": 6692 }, { "epoch": 1.2923344274956556, "grad_norm": 1.1179829835891724, "learning_rate": 3.353036049170917e-05, "loss": 0.6452, "step": 6693 }, { "epoch": 1.2925275149642788, "grad_norm": 1.1135010719299316, "learning_rate": 3.3514451377180687e-05, "loss": 0.6673, "step": 6694 }, { "epoch": 1.292720602432902, "grad_norm": 0.5512592196464539, "learning_rate": 3.3498544134993225e-05, "loss": 0.5907, "step": 6695 }, { "epoch": 1.2929136899015254, "grad_norm": 1.1478756666183472, "learning_rate": 3.3482638766953417e-05, "loss": 0.5296, "step": 6696 }, { "epoch": 1.2931067773701486, "grad_norm": 0.6485825777053833, "learning_rate": 3.3466735274867746e-05, "loss": 0.5835, "step": 6697 }, { "epoch": 1.293299864838772, "grad_norm": 1.1815533638000488, "learning_rate": 3.345083366054239e-05, "loss": 0.636, "step": 6698 }, { "epoch": 1.2934929523073952, "grad_norm": 0.733460009098053, "learning_rate": 3.343493392578342e-05, "loss": 0.6126, "step": 6699 }, { "epoch": 1.2936860397760186, "grad_norm": 0.8537855744361877, "learning_rate": 3.3419036072396616e-05, "loss": 0.6109, "step": 6700 }, { "epoch": 1.293879127244642, "grad_norm": 1.4735603332519531, "learning_rate": 3.3403140102187574e-05, "loss": 0.6316, "step": 6701 }, { "epoch": 1.294072214713265, "grad_norm": 0.5804392695426941, "learning_rate": 3.33872460169617e-05, "loss": 0.6054, "step": 6702 }, { "epoch": 1.2942653021818884, "grad_norm": 0.9470624923706055, "learning_rate": 3.3371353818524144e-05, "loss": 0.598, "step": 6703 }, { "epoch": 1.2944583896505117, "grad_norm": 0.6873767375946045, "learning_rate": 3.335546350867983e-05, "loss": 0.6552, "step": 6704 }, { "epoch": 1.2946514771191349, "grad_norm": 0.7831783890724182, "learning_rate": 3.333957508923352e-05, "loss": 0.6295, "step": 6705 }, { "epoch": 1.2948445645877582, "grad_norm": 0.5446333885192871, "learning_rate": 3.332368856198975e-05, "loss": 0.6101, "step": 6706 }, { "epoch": 1.2950376520563815, "grad_norm": 0.7559564709663391, "learning_rate": 3.3307803928752804e-05, "loss": 0.6136, "step": 6707 }, { "epoch": 1.295230739525005, "grad_norm": 0.8344035744667053, "learning_rate": 3.329192119132679e-05, "loss": 0.6666, "step": 6708 }, { "epoch": 1.2954238269936282, "grad_norm": 0.8610531687736511, "learning_rate": 3.327604035151557e-05, "loss": 0.663, "step": 6709 }, { "epoch": 1.2956169144622514, "grad_norm": 0.8073675632476807, "learning_rate": 3.326016141112281e-05, "loss": 0.6615, "step": 6710 }, { "epoch": 1.2958100019308747, "grad_norm": 0.8736888766288757, "learning_rate": 3.324428437195196e-05, "loss": 0.5668, "step": 6711 }, { "epoch": 1.296003089399498, "grad_norm": 0.8738512396812439, "learning_rate": 3.322840923580625e-05, "loss": 0.6221, "step": 6712 }, { "epoch": 1.2961961768681212, "grad_norm": 0.6548253893852234, "learning_rate": 3.321253600448869e-05, "loss": 0.6589, "step": 6713 }, { "epoch": 1.2963892643367445, "grad_norm": 1.1001607179641724, "learning_rate": 3.3196664679802096e-05, "loss": 0.6586, "step": 6714 }, { "epoch": 1.2965823518053678, "grad_norm": 0.9068514108657837, "learning_rate": 3.3180795263549015e-05, "loss": 0.5669, "step": 6715 }, { "epoch": 1.2967754392739912, "grad_norm": 0.9693323373794556, "learning_rate": 3.316492775753182e-05, "loss": 0.5687, "step": 6716 }, { "epoch": 1.2969685267426145, "grad_norm": 0.8625608682632446, "learning_rate": 3.314906216355268e-05, "loss": 0.6513, "step": 6717 }, { "epoch": 1.2971616142112377, "grad_norm": 0.7728898525238037, "learning_rate": 3.31331984834135e-05, "loss": 0.6286, "step": 6718 }, { "epoch": 1.297354701679861, "grad_norm": 0.7704652547836304, "learning_rate": 3.3117336718915996e-05, "loss": 0.6176, "step": 6719 }, { "epoch": 1.2975477891484843, "grad_norm": 0.9169814586639404, "learning_rate": 3.3101476871861695e-05, "loss": 0.5443, "step": 6720 }, { "epoch": 1.2977408766171075, "grad_norm": 1.7043523788452148, "learning_rate": 3.308561894405185e-05, "loss": 0.5647, "step": 6721 }, { "epoch": 1.2979339640857308, "grad_norm": 1.7332123517990112, "learning_rate": 3.306976293728749e-05, "loss": 0.6728, "step": 6722 }, { "epoch": 1.2981270515543541, "grad_norm": 1.4757006168365479, "learning_rate": 3.30539088533695e-05, "loss": 0.6406, "step": 6723 }, { "epoch": 1.2983201390229775, "grad_norm": 0.865459144115448, "learning_rate": 3.303805669409848e-05, "loss": 0.5666, "step": 6724 }, { "epoch": 1.2985132264916006, "grad_norm": 2.020914077758789, "learning_rate": 3.302220646127487e-05, "loss": 0.5553, "step": 6725 }, { "epoch": 1.298706313960224, "grad_norm": 0.8030680418014526, "learning_rate": 3.30063581566988e-05, "loss": 0.627, "step": 6726 }, { "epoch": 1.2988994014288473, "grad_norm": 1.3438706398010254, "learning_rate": 3.2990511782170266e-05, "loss": 0.665, "step": 6727 }, { "epoch": 1.2990924888974704, "grad_norm": 1.0346755981445312, "learning_rate": 3.297466733948902e-05, "loss": 0.6154, "step": 6728 }, { "epoch": 1.2992855763660938, "grad_norm": 0.5268756151199341, "learning_rate": 3.2958824830454574e-05, "loss": 0.5798, "step": 6729 }, { "epoch": 1.299478663834717, "grad_norm": 0.8893669843673706, "learning_rate": 3.294298425686624e-05, "loss": 0.6436, "step": 6730 }, { "epoch": 1.2996717513033405, "grad_norm": 0.7770850658416748, "learning_rate": 3.292714562052313e-05, "loss": 0.6126, "step": 6731 }, { "epoch": 1.2998648387719638, "grad_norm": 0.5456533432006836, "learning_rate": 3.291130892322408e-05, "loss": 0.5698, "step": 6732 }, { "epoch": 1.300057926240587, "grad_norm": 0.8236610293388367, "learning_rate": 3.289547416676775e-05, "loss": 0.6202, "step": 6733 }, { "epoch": 1.3002510137092103, "grad_norm": 0.6288572549819946, "learning_rate": 3.287964135295257e-05, "loss": 0.6192, "step": 6734 }, { "epoch": 1.3004441011778336, "grad_norm": 0.5463008284568787, "learning_rate": 3.286381048357674e-05, "loss": 0.6414, "step": 6735 }, { "epoch": 1.3006371886464567, "grad_norm": 1.051960825920105, "learning_rate": 3.284798156043824e-05, "loss": 0.601, "step": 6736 }, { "epoch": 1.30083027611508, "grad_norm": 1.674296498298645, "learning_rate": 3.2832154585334876e-05, "loss": 0.6521, "step": 6737 }, { "epoch": 1.3010233635837034, "grad_norm": 1.1015781164169312, "learning_rate": 3.281632956006414e-05, "loss": 0.5746, "step": 6738 }, { "epoch": 1.3012164510523268, "grad_norm": 0.6003419160842896, "learning_rate": 3.2800506486423376e-05, "loss": 0.5873, "step": 6739 }, { "epoch": 1.30140953852095, "grad_norm": 0.5635243058204651, "learning_rate": 3.278468536620968e-05, "loss": 0.6092, "step": 6740 }, { "epoch": 1.3016026259895732, "grad_norm": 1.0909825563430786, "learning_rate": 3.276886620121994e-05, "loss": 0.5889, "step": 6741 }, { "epoch": 1.3017957134581966, "grad_norm": 0.819686770439148, "learning_rate": 3.27530489932508e-05, "loss": 0.5892, "step": 6742 }, { "epoch": 1.30198880092682, "grad_norm": 1.0935025215148926, "learning_rate": 3.2737233744098724e-05, "loss": 0.6037, "step": 6743 }, { "epoch": 1.302181888395443, "grad_norm": 1.5129860639572144, "learning_rate": 3.272142045555988e-05, "loss": 0.5938, "step": 6744 }, { "epoch": 1.3023749758640664, "grad_norm": 0.567198634147644, "learning_rate": 3.270560912943029e-05, "loss": 0.5621, "step": 6745 }, { "epoch": 1.3025680633326897, "grad_norm": 0.8767841458320618, "learning_rate": 3.2689799767505705e-05, "loss": 0.6282, "step": 6746 }, { "epoch": 1.302761150801313, "grad_norm": 0.7342284321784973, "learning_rate": 3.267399237158167e-05, "loss": 0.6407, "step": 6747 }, { "epoch": 1.3029542382699364, "grad_norm": 0.5729246735572815, "learning_rate": 3.265818694345353e-05, "loss": 0.5408, "step": 6748 }, { "epoch": 1.3031473257385595, "grad_norm": 0.6777838468551636, "learning_rate": 3.264238348491634e-05, "loss": 0.6498, "step": 6749 }, { "epoch": 1.3033404132071829, "grad_norm": 0.7247516512870789, "learning_rate": 3.262658199776498e-05, "loss": 0.657, "step": 6750 }, { "epoch": 1.3035335006758062, "grad_norm": 0.8091031312942505, "learning_rate": 3.2610782483794124e-05, "loss": 0.6108, "step": 6751 }, { "epoch": 1.3037265881444293, "grad_norm": 1.8086670637130737, "learning_rate": 3.259498494479819e-05, "loss": 0.5388, "step": 6752 }, { "epoch": 1.3039196756130527, "grad_norm": 0.5968474745750427, "learning_rate": 3.257918938257136e-05, "loss": 0.6364, "step": 6753 }, { "epoch": 1.304112763081676, "grad_norm": 0.5561448931694031, "learning_rate": 3.256339579890764e-05, "loss": 0.5845, "step": 6754 }, { "epoch": 1.3043058505502994, "grad_norm": 0.7571040391921997, "learning_rate": 3.254760419560075e-05, "loss": 0.5463, "step": 6755 }, { "epoch": 1.3044989380189227, "grad_norm": 1.1277670860290527, "learning_rate": 3.2531814574444244e-05, "loss": 0.6192, "step": 6756 }, { "epoch": 1.3046920254875458, "grad_norm": 0.6823803186416626, "learning_rate": 3.251602693723139e-05, "loss": 0.5815, "step": 6757 }, { "epoch": 1.3048851129561692, "grad_norm": 0.5177077651023865, "learning_rate": 3.250024128575528e-05, "loss": 0.6397, "step": 6758 }, { "epoch": 1.3050782004247925, "grad_norm": 0.9463492035865784, "learning_rate": 3.2484457621808783e-05, "loss": 0.5569, "step": 6759 }, { "epoch": 1.3052712878934156, "grad_norm": 0.7339701652526855, "learning_rate": 3.2468675947184525e-05, "loss": 0.603, "step": 6760 }, { "epoch": 1.305464375362039, "grad_norm": 0.6121624708175659, "learning_rate": 3.2452896263674856e-05, "loss": 0.6525, "step": 6761 }, { "epoch": 1.3056574628306623, "grad_norm": 0.6073617935180664, "learning_rate": 3.243711857307199e-05, "loss": 0.5493, "step": 6762 }, { "epoch": 1.3058505502992857, "grad_norm": 0.7200415134429932, "learning_rate": 3.242134287716787e-05, "loss": 0.6127, "step": 6763 }, { "epoch": 1.306043637767909, "grad_norm": 0.6490638256072998, "learning_rate": 3.24055691777542e-05, "loss": 0.6382, "step": 6764 }, { "epoch": 1.3062367252365321, "grad_norm": 0.7262925505638123, "learning_rate": 3.238979747662251e-05, "loss": 0.5859, "step": 6765 }, { "epoch": 1.3064298127051555, "grad_norm": 0.8452650308609009, "learning_rate": 3.2374027775564006e-05, "loss": 0.5893, "step": 6766 }, { "epoch": 1.3066229001737788, "grad_norm": 1.6650991439819336, "learning_rate": 3.235826007636979e-05, "loss": 0.6042, "step": 6767 }, { "epoch": 1.306815987642402, "grad_norm": 1.1800172328948975, "learning_rate": 3.2342494380830615e-05, "loss": 0.5797, "step": 6768 }, { "epoch": 1.3070090751110253, "grad_norm": 0.640116274356842, "learning_rate": 3.232673069073709e-05, "loss": 0.6254, "step": 6769 }, { "epoch": 1.3072021625796486, "grad_norm": 0.8342651128768921, "learning_rate": 3.231096900787959e-05, "loss": 0.5737, "step": 6770 }, { "epoch": 1.307395250048272, "grad_norm": 0.8452351689338684, "learning_rate": 3.2295209334048246e-05, "loss": 0.6208, "step": 6771 }, { "epoch": 1.307588337516895, "grad_norm": 0.566375732421875, "learning_rate": 3.227945167103291e-05, "loss": 0.5919, "step": 6772 }, { "epoch": 1.3077814249855184, "grad_norm": 0.7269399166107178, "learning_rate": 3.2263696020623284e-05, "loss": 0.6041, "step": 6773 }, { "epoch": 1.3079745124541418, "grad_norm": 0.5656101703643799, "learning_rate": 3.224794238460882e-05, "loss": 0.5982, "step": 6774 }, { "epoch": 1.3081675999227649, "grad_norm": 0.6300897598266602, "learning_rate": 3.2232190764778715e-05, "loss": 0.6508, "step": 6775 }, { "epoch": 1.3083606873913882, "grad_norm": 0.5715771317481995, "learning_rate": 3.221644116292197e-05, "loss": 0.5367, "step": 6776 }, { "epoch": 1.3085537748600116, "grad_norm": 14.762195587158203, "learning_rate": 3.220069358082734e-05, "loss": 0.5345, "step": 6777 }, { "epoch": 1.308746862328635, "grad_norm": 0.8264080882072449, "learning_rate": 3.218494802028333e-05, "loss": 0.6097, "step": 6778 }, { "epoch": 1.3089399497972583, "grad_norm": 0.9077831506729126, "learning_rate": 3.216920448307824e-05, "loss": 0.5462, "step": 6779 }, { "epoch": 1.3091330372658814, "grad_norm": 0.6178197264671326, "learning_rate": 3.215346297100017e-05, "loss": 0.6336, "step": 6780 }, { "epoch": 1.3093261247345047, "grad_norm": 1.798786997795105, "learning_rate": 3.213772348583692e-05, "loss": 0.6488, "step": 6781 }, { "epoch": 1.309519212203128, "grad_norm": 0.6178545951843262, "learning_rate": 3.2121986029376116e-05, "loss": 0.6211, "step": 6782 }, { "epoch": 1.3097122996717512, "grad_norm": 1.382070541381836, "learning_rate": 3.2106250603405155e-05, "loss": 0.6113, "step": 6783 }, { "epoch": 1.3099053871403745, "grad_norm": 0.6903949975967407, "learning_rate": 3.209051720971115e-05, "loss": 0.6008, "step": 6784 }, { "epoch": 1.3100984746089979, "grad_norm": 0.5371904969215393, "learning_rate": 3.2074785850081004e-05, "loss": 0.5649, "step": 6785 }, { "epoch": 1.3102915620776212, "grad_norm": 0.7433164119720459, "learning_rate": 3.2059056526301434e-05, "loss": 0.6601, "step": 6786 }, { "epoch": 1.3104846495462446, "grad_norm": 0.5928284525871277, "learning_rate": 3.204332924015889e-05, "loss": 0.6298, "step": 6787 }, { "epoch": 1.3106777370148677, "grad_norm": 1.0238986015319824, "learning_rate": 3.202760399343959e-05, "loss": 0.6222, "step": 6788 }, { "epoch": 1.310870824483491, "grad_norm": 0.7028841972351074, "learning_rate": 3.201188078792951e-05, "loss": 0.6309, "step": 6789 }, { "epoch": 1.3110639119521144, "grad_norm": 1.044106364250183, "learning_rate": 3.199615962541441e-05, "loss": 0.6539, "step": 6790 }, { "epoch": 1.3112569994207375, "grad_norm": 0.5218058824539185, "learning_rate": 3.198044050767984e-05, "loss": 0.6241, "step": 6791 }, { "epoch": 1.3114500868893608, "grad_norm": 0.7370857000350952, "learning_rate": 3.196472343651107e-05, "loss": 0.6252, "step": 6792 }, { "epoch": 1.3116431743579842, "grad_norm": 0.6009345650672913, "learning_rate": 3.194900841369316e-05, "loss": 0.6618, "step": 6793 }, { "epoch": 1.3118362618266075, "grad_norm": 1.2290891408920288, "learning_rate": 3.193329544101097e-05, "loss": 0.6528, "step": 6794 }, { "epoch": 1.3120293492952309, "grad_norm": 0.8846738934516907, "learning_rate": 3.191758452024907e-05, "loss": 0.6157, "step": 6795 }, { "epoch": 1.312222436763854, "grad_norm": 0.77024245262146, "learning_rate": 3.190187565319181e-05, "loss": 0.6242, "step": 6796 }, { "epoch": 1.3124155242324773, "grad_norm": 0.5694224834442139, "learning_rate": 3.188616884162334e-05, "loss": 0.5688, "step": 6797 }, { "epoch": 1.3126086117011007, "grad_norm": 0.5900863409042358, "learning_rate": 3.187046408732756e-05, "loss": 0.6635, "step": 6798 }, { "epoch": 1.3128016991697238, "grad_norm": 1.1731846332550049, "learning_rate": 3.1854761392088114e-05, "loss": 0.6342, "step": 6799 }, { "epoch": 1.3129947866383471, "grad_norm": 0.7339535355567932, "learning_rate": 3.183906075768847e-05, "loss": 0.5671, "step": 6800 }, { "epoch": 1.3131878741069705, "grad_norm": 0.7389934659004211, "learning_rate": 3.182336218591177e-05, "loss": 0.6207, "step": 6801 }, { "epoch": 1.3133809615755938, "grad_norm": 0.5352240204811096, "learning_rate": 3.180766567854101e-05, "loss": 0.598, "step": 6802 }, { "epoch": 1.3135740490442172, "grad_norm": 0.6616490483283997, "learning_rate": 3.179197123735889e-05, "loss": 0.5999, "step": 6803 }, { "epoch": 1.3137671365128403, "grad_norm": 7.520901203155518, "learning_rate": 3.177627886414792e-05, "loss": 0.5496, "step": 6804 }, { "epoch": 1.3139602239814636, "grad_norm": 0.626404345035553, "learning_rate": 3.176058856069037e-05, "loss": 0.5377, "step": 6805 }, { "epoch": 1.314153311450087, "grad_norm": 0.7968479990959167, "learning_rate": 3.174490032876824e-05, "loss": 0.6343, "step": 6806 }, { "epoch": 1.31434639891871, "grad_norm": 0.6517334580421448, "learning_rate": 3.172921417016331e-05, "loss": 0.6517, "step": 6807 }, { "epoch": 1.3145394863873334, "grad_norm": 0.669463038444519, "learning_rate": 3.171353008665713e-05, "loss": 0.6311, "step": 6808 }, { "epoch": 1.3147325738559568, "grad_norm": 0.9000421762466431, "learning_rate": 3.1697848080031045e-05, "loss": 0.636, "step": 6809 }, { "epoch": 1.31492566132458, "grad_norm": 0.9537325501441956, "learning_rate": 3.1682168152066096e-05, "loss": 0.6265, "step": 6810 }, { "epoch": 1.3151187487932035, "grad_norm": 7.853219509124756, "learning_rate": 3.1666490304543166e-05, "loss": 0.5781, "step": 6811 }, { "epoch": 1.3153118362618266, "grad_norm": 0.6632232666015625, "learning_rate": 3.165081453924282e-05, "loss": 0.5964, "step": 6812 }, { "epoch": 1.31550492373045, "grad_norm": 0.885865330696106, "learning_rate": 3.163514085794546e-05, "loss": 0.622, "step": 6813 }, { "epoch": 1.3156980111990733, "grad_norm": 5.15604829788208, "learning_rate": 3.16194692624312e-05, "loss": 0.6417, "step": 6814 }, { "epoch": 1.3158910986676964, "grad_norm": 0.5019404888153076, "learning_rate": 3.1603799754479935e-05, "loss": 0.5439, "step": 6815 }, { "epoch": 1.3160841861363197, "grad_norm": 1.0774413347244263, "learning_rate": 3.158813233587135e-05, "loss": 0.5811, "step": 6816 }, { "epoch": 1.316277273604943, "grad_norm": 1.0561386346817017, "learning_rate": 3.157246700838485e-05, "loss": 0.6382, "step": 6817 }, { "epoch": 1.3164703610735664, "grad_norm": 2.7306840419769287, "learning_rate": 3.1556803773799614e-05, "loss": 0.7167, "step": 6818 }, { "epoch": 1.3166634485421895, "grad_norm": 0.8465412855148315, "learning_rate": 3.154114263389459e-05, "loss": 0.5931, "step": 6819 }, { "epoch": 1.3168565360108129, "grad_norm": 0.8357062935829163, "learning_rate": 3.15254835904485e-05, "loss": 0.5997, "step": 6820 }, { "epoch": 1.3170496234794362, "grad_norm": 0.5885013341903687, "learning_rate": 3.1509826645239794e-05, "loss": 0.5981, "step": 6821 }, { "epoch": 1.3172427109480593, "grad_norm": 0.7398966550827026, "learning_rate": 3.149417180004674e-05, "loss": 0.6328, "step": 6822 }, { "epoch": 1.3174357984166827, "grad_norm": 0.8119408488273621, "learning_rate": 3.147851905664729e-05, "loss": 0.6824, "step": 6823 }, { "epoch": 1.317628885885306, "grad_norm": 0.8916471004486084, "learning_rate": 3.1462868416819234e-05, "loss": 0.6608, "step": 6824 }, { "epoch": 1.3178219733539294, "grad_norm": 0.531112790107727, "learning_rate": 3.144721988234006e-05, "loss": 0.6185, "step": 6825 }, { "epoch": 1.3180150608225527, "grad_norm": 1.3997817039489746, "learning_rate": 3.143157345498706e-05, "loss": 0.5786, "step": 6826 }, { "epoch": 1.3182081482911758, "grad_norm": 0.5339335203170776, "learning_rate": 3.141592913653727e-05, "loss": 0.5987, "step": 6827 }, { "epoch": 1.3184012357597992, "grad_norm": 0.6958271265029907, "learning_rate": 3.1400286928767507e-05, "loss": 0.6419, "step": 6828 }, { "epoch": 1.3185943232284225, "grad_norm": 0.6285881996154785, "learning_rate": 3.1384646833454294e-05, "loss": 0.6266, "step": 6829 }, { "epoch": 1.3187874106970456, "grad_norm": 3.146268606185913, "learning_rate": 3.1369008852373984e-05, "loss": 0.6748, "step": 6830 }, { "epoch": 1.318980498165669, "grad_norm": 0.6789501309394836, "learning_rate": 3.135337298730262e-05, "loss": 0.6037, "step": 6831 }, { "epoch": 1.3191735856342923, "grad_norm": 0.5791012048721313, "learning_rate": 3.133773924001606e-05, "loss": 0.6183, "step": 6832 }, { "epoch": 1.3193666731029157, "grad_norm": 2.3955371379852295, "learning_rate": 3.132210761228992e-05, "loss": 0.5851, "step": 6833 }, { "epoch": 1.319559760571539, "grad_norm": 0.7494352459907532, "learning_rate": 3.1306478105899544e-05, "loss": 0.5932, "step": 6834 }, { "epoch": 1.3197528480401621, "grad_norm": 3.7372472286224365, "learning_rate": 3.129085072262002e-05, "loss": 0.6073, "step": 6835 }, { "epoch": 1.3199459355087855, "grad_norm": 0.8410347104072571, "learning_rate": 3.1275225464226256e-05, "loss": 0.5741, "step": 6836 }, { "epoch": 1.3201390229774088, "grad_norm": 0.7283706068992615, "learning_rate": 3.125960233249289e-05, "loss": 0.6389, "step": 6837 }, { "epoch": 1.320332110446032, "grad_norm": 0.6453686952590942, "learning_rate": 3.124398132919428e-05, "loss": 0.6141, "step": 6838 }, { "epoch": 1.3205251979146553, "grad_norm": 0.6159746050834656, "learning_rate": 3.12283624561046e-05, "loss": 0.6588, "step": 6839 }, { "epoch": 1.3207182853832786, "grad_norm": 0.8632875084877014, "learning_rate": 3.121274571499778e-05, "loss": 0.6293, "step": 6840 }, { "epoch": 1.320911372851902, "grad_norm": 0.7671672701835632, "learning_rate": 3.119713110764746e-05, "loss": 0.6359, "step": 6841 }, { "epoch": 1.3211044603205253, "grad_norm": 0.9245070219039917, "learning_rate": 3.118151863582704e-05, "loss": 0.646, "step": 6842 }, { "epoch": 1.3212975477891484, "grad_norm": 0.9388509392738342, "learning_rate": 3.116590830130974e-05, "loss": 0.6428, "step": 6843 }, { "epoch": 1.3214906352577718, "grad_norm": 0.7446608543395996, "learning_rate": 3.115030010586849e-05, "loss": 0.5938, "step": 6844 }, { "epoch": 1.3216837227263951, "grad_norm": 4.755198001861572, "learning_rate": 3.113469405127598e-05, "loss": 0.62, "step": 6845 }, { "epoch": 1.3218768101950182, "grad_norm": 0.6267099380493164, "learning_rate": 3.111909013930468e-05, "loss": 0.6068, "step": 6846 }, { "epoch": 1.3220698976636416, "grad_norm": 2.704256296157837, "learning_rate": 3.110348837172677e-05, "loss": 0.5724, "step": 6847 }, { "epoch": 1.322262985132265, "grad_norm": 1.0429896116256714, "learning_rate": 3.1087888750314244e-05, "loss": 0.6309, "step": 6848 }, { "epoch": 1.3224560726008883, "grad_norm": 0.915234386920929, "learning_rate": 3.107229127683879e-05, "loss": 0.5479, "step": 6849 }, { "epoch": 1.3226491600695116, "grad_norm": 0.8114106059074402, "learning_rate": 3.1056695953071914e-05, "loss": 0.6332, "step": 6850 }, { "epoch": 1.3228422475381347, "grad_norm": 0.8711191415786743, "learning_rate": 3.104110278078486e-05, "loss": 0.6206, "step": 6851 }, { "epoch": 1.323035335006758, "grad_norm": 4.30318021774292, "learning_rate": 3.10255117617486e-05, "loss": 0.5569, "step": 6852 }, { "epoch": 1.3232284224753814, "grad_norm": 0.9339869618415833, "learning_rate": 3.100992289773387e-05, "loss": 0.5821, "step": 6853 }, { "epoch": 1.3234215099440045, "grad_norm": 1.0577168464660645, "learning_rate": 3.099433619051117e-05, "loss": 0.5894, "step": 6854 }, { "epoch": 1.3236145974126279, "grad_norm": 1.021363377571106, "learning_rate": 3.097875164185078e-05, "loss": 0.5309, "step": 6855 }, { "epoch": 1.3238076848812512, "grad_norm": 0.6229543685913086, "learning_rate": 3.0963169253522684e-05, "loss": 0.6052, "step": 6856 }, { "epoch": 1.3240007723498746, "grad_norm": 0.7572348713874817, "learning_rate": 3.0947589027296685e-05, "loss": 0.6106, "step": 6857 }, { "epoch": 1.324193859818498, "grad_norm": 0.8687805533409119, "learning_rate": 3.093201096494225e-05, "loss": 0.5998, "step": 6858 }, { "epoch": 1.324386947287121, "grad_norm": 1.0391005277633667, "learning_rate": 3.09164350682287e-05, "loss": 0.6386, "step": 6859 }, { "epoch": 1.3245800347557444, "grad_norm": 0.8262424468994141, "learning_rate": 3.090086133892502e-05, "loss": 0.6271, "step": 6860 }, { "epoch": 1.3247731222243677, "grad_norm": 0.7410253882408142, "learning_rate": 3.088528977880002e-05, "loss": 0.6333, "step": 6861 }, { "epoch": 1.3249662096929908, "grad_norm": 0.8817375898361206, "learning_rate": 3.086972038962223e-05, "loss": 0.5869, "step": 6862 }, { "epoch": 1.3251592971616142, "grad_norm": 0.7670608758926392, "learning_rate": 3.0854153173159946e-05, "loss": 0.6018, "step": 6863 }, { "epoch": 1.3253523846302375, "grad_norm": 1.4765796661376953, "learning_rate": 3.083858813118119e-05, "loss": 0.6453, "step": 6864 }, { "epoch": 1.3255454720988609, "grad_norm": 1.225938081741333, "learning_rate": 3.082302526545377e-05, "loss": 0.6487, "step": 6865 }, { "epoch": 1.325738559567484, "grad_norm": 0.584320604801178, "learning_rate": 3.080746457774522e-05, "loss": 0.5863, "step": 6866 }, { "epoch": 1.3259316470361073, "grad_norm": 1.050350308418274, "learning_rate": 3.079190606982285e-05, "loss": 0.5998, "step": 6867 }, { "epoch": 1.3261247345047307, "grad_norm": 0.9007635712623596, "learning_rate": 3.077634974345374e-05, "loss": 0.5924, "step": 6868 }, { "epoch": 1.3263178219733538, "grad_norm": 1.5183820724487305, "learning_rate": 3.076079560040465e-05, "loss": 0.623, "step": 6869 }, { "epoch": 1.3265109094419771, "grad_norm": 0.9799007177352905, "learning_rate": 3.074524364244215e-05, "loss": 0.5635, "step": 6870 }, { "epoch": 1.3267039969106005, "grad_norm": 0.6326213479042053, "learning_rate": 3.072969387133255e-05, "loss": 0.6165, "step": 6871 }, { "epoch": 1.3268970843792238, "grad_norm": 1.4920045137405396, "learning_rate": 3.0714146288841925e-05, "loss": 0.5258, "step": 6872 }, { "epoch": 1.3270901718478472, "grad_norm": 0.630389928817749, "learning_rate": 3.069860089673607e-05, "loss": 0.6136, "step": 6873 }, { "epoch": 1.3272832593164703, "grad_norm": 1.129332184791565, "learning_rate": 3.068305769678057e-05, "loss": 0.5363, "step": 6874 }, { "epoch": 1.3274763467850936, "grad_norm": 3.3234996795654297, "learning_rate": 3.0667516690740706e-05, "loss": 0.6367, "step": 6875 }, { "epoch": 1.327669434253717, "grad_norm": 1.258879542350769, "learning_rate": 3.0651977880381564e-05, "loss": 0.6032, "step": 6876 }, { "epoch": 1.32786252172234, "grad_norm": 0.8886156678199768, "learning_rate": 3.0636441267467955e-05, "loss": 0.6076, "step": 6877 }, { "epoch": 1.3280556091909634, "grad_norm": 7.642019271850586, "learning_rate": 3.062090685376443e-05, "loss": 0.6718, "step": 6878 }, { "epoch": 1.3282486966595868, "grad_norm": 0.5812510251998901, "learning_rate": 3.0605374641035347e-05, "loss": 0.5364, "step": 6879 }, { "epoch": 1.3284417841282101, "grad_norm": 1.427570104598999, "learning_rate": 3.058984463104475e-05, "loss": 0.5888, "step": 6880 }, { "epoch": 1.3286348715968335, "grad_norm": 3.2110817432403564, "learning_rate": 3.057431682555643e-05, "loss": 0.6, "step": 6881 }, { "epoch": 1.3288279590654566, "grad_norm": 1.0692145824432373, "learning_rate": 3.055879122633397e-05, "loss": 0.5507, "step": 6882 }, { "epoch": 1.32902104653408, "grad_norm": 0.7054784893989563, "learning_rate": 3.05432678351407e-05, "loss": 0.6048, "step": 6883 }, { "epoch": 1.3292141340027033, "grad_norm": 0.6588642001152039, "learning_rate": 3.0527746653739664e-05, "loss": 0.6041, "step": 6884 }, { "epoch": 1.3294072214713264, "grad_norm": 0.7685623168945312, "learning_rate": 3.0512227683893686e-05, "loss": 0.6405, "step": 6885 }, { "epoch": 1.3296003089399497, "grad_norm": 2.1384241580963135, "learning_rate": 3.049671092736534e-05, "loss": 0.6143, "step": 6886 }, { "epoch": 1.329793396408573, "grad_norm": 0.8577025532722473, "learning_rate": 3.0481196385916923e-05, "loss": 0.6243, "step": 6887 }, { "epoch": 1.3299864838771964, "grad_norm": 1.8306862115859985, "learning_rate": 3.0465684061310472e-05, "loss": 0.6074, "step": 6888 }, { "epoch": 1.3301795713458198, "grad_norm": 1.770463466644287, "learning_rate": 3.0450173955307822e-05, "loss": 0.5781, "step": 6889 }, { "epoch": 1.3303726588144429, "grad_norm": 0.7912010550498962, "learning_rate": 3.0434666069670532e-05, "loss": 0.6159, "step": 6890 }, { "epoch": 1.3305657462830662, "grad_norm": 0.8401220440864563, "learning_rate": 3.0419160406159907e-05, "loss": 0.6887, "step": 6891 }, { "epoch": 1.3307588337516896, "grad_norm": 0.9763790965080261, "learning_rate": 3.0403656966536965e-05, "loss": 0.5918, "step": 6892 }, { "epoch": 1.3309519212203127, "grad_norm": 0.7932782173156738, "learning_rate": 3.038815575256253e-05, "loss": 0.6526, "step": 6893 }, { "epoch": 1.331145008688936, "grad_norm": 1.0053859949111938, "learning_rate": 3.0372656765997147e-05, "loss": 0.6634, "step": 6894 }, { "epoch": 1.3313380961575594, "grad_norm": 0.9263525009155273, "learning_rate": 3.03571600086011e-05, "loss": 0.5666, "step": 6895 }, { "epoch": 1.3315311836261827, "grad_norm": 1.0769784450531006, "learning_rate": 3.034166548213443e-05, "loss": 0.6316, "step": 6896 }, { "epoch": 1.331724271094806, "grad_norm": 1.2355906963348389, "learning_rate": 3.0326173188356953e-05, "loss": 0.5886, "step": 6897 }, { "epoch": 1.3319173585634292, "grad_norm": 0.9298796653747559, "learning_rate": 3.031068312902816e-05, "loss": 0.6785, "step": 6898 }, { "epoch": 1.3321104460320525, "grad_norm": 0.8963666558265686, "learning_rate": 3.0295195305907332e-05, "loss": 0.5502, "step": 6899 }, { "epoch": 1.3323035335006759, "grad_norm": 0.7959970831871033, "learning_rate": 3.0279709720753513e-05, "loss": 0.5798, "step": 6900 }, { "epoch": 1.332496620969299, "grad_norm": 1.6246639490127563, "learning_rate": 3.026422637532547e-05, "loss": 0.5794, "step": 6901 }, { "epoch": 1.3326897084379223, "grad_norm": 0.6219362616539001, "learning_rate": 3.0248745271381713e-05, "loss": 0.5512, "step": 6902 }, { "epoch": 1.3328827959065457, "grad_norm": 1.1907991170883179, "learning_rate": 3.0233266410680517e-05, "loss": 0.6259, "step": 6903 }, { "epoch": 1.333075883375169, "grad_norm": 1.8719273805618286, "learning_rate": 3.0217789794979866e-05, "loss": 0.608, "step": 6904 }, { "epoch": 1.3332689708437924, "grad_norm": 0.9430632591247559, "learning_rate": 3.020231542603753e-05, "loss": 0.6817, "step": 6905 }, { "epoch": 1.3334620583124155, "grad_norm": 3.710623264312744, "learning_rate": 3.018684330561099e-05, "loss": 0.5797, "step": 6906 }, { "epoch": 1.3336551457810388, "grad_norm": 0.9209043383598328, "learning_rate": 3.0171373435457507e-05, "loss": 0.6014, "step": 6907 }, { "epoch": 1.3338482332496622, "grad_norm": 0.9111219644546509, "learning_rate": 3.0155905817334052e-05, "loss": 0.6382, "step": 6908 }, { "epoch": 1.3340413207182853, "grad_norm": 2.2154910564422607, "learning_rate": 3.0140440452997364e-05, "loss": 0.6008, "step": 6909 }, { "epoch": 1.3342344081869086, "grad_norm": 0.8725607395172119, "learning_rate": 3.0124977344203907e-05, "loss": 0.604, "step": 6910 }, { "epoch": 1.334427495655532, "grad_norm": 1.086142659187317, "learning_rate": 3.010951649270991e-05, "loss": 0.6293, "step": 6911 }, { "epoch": 1.3346205831241553, "grad_norm": 1.4934483766555786, "learning_rate": 3.0094057900271312e-05, "loss": 0.6397, "step": 6912 }, { "epoch": 1.3348136705927787, "grad_norm": 0.7777523398399353, "learning_rate": 3.007860156864384e-05, "loss": 0.6589, "step": 6913 }, { "epoch": 1.3350067580614018, "grad_norm": 1.0491359233856201, "learning_rate": 3.0063147499582955e-05, "loss": 0.6266, "step": 6914 }, { "epoch": 1.3351998455300251, "grad_norm": 0.7679900527000427, "learning_rate": 3.0047695694843814e-05, "loss": 0.6226, "step": 6915 }, { "epoch": 1.3353929329986483, "grad_norm": 10.452478408813477, "learning_rate": 3.003224615618136e-05, "loss": 0.5622, "step": 6916 }, { "epoch": 1.3355860204672716, "grad_norm": 0.9396721124649048, "learning_rate": 3.0016798885350273e-05, "loss": 0.6662, "step": 6917 }, { "epoch": 1.335779107935895, "grad_norm": 1.7358344793319702, "learning_rate": 3.000135388410499e-05, "loss": 0.5846, "step": 6918 }, { "epoch": 1.3359721954045183, "grad_norm": 1.7493183612823486, "learning_rate": 2.9985911154199635e-05, "loss": 0.6123, "step": 6919 }, { "epoch": 1.3361652828731416, "grad_norm": 1.5612648725509644, "learning_rate": 2.9970470697388163e-05, "loss": 0.5666, "step": 6920 }, { "epoch": 1.3363583703417647, "grad_norm": 1.6835861206054688, "learning_rate": 2.9955032515424164e-05, "loss": 0.6141, "step": 6921 }, { "epoch": 1.336551457810388, "grad_norm": 0.8750703930854797, "learning_rate": 2.993959661006106e-05, "loss": 0.6495, "step": 6922 }, { "epoch": 1.3367445452790114, "grad_norm": 1.0814790725708008, "learning_rate": 2.992416298305196e-05, "loss": 0.6603, "step": 6923 }, { "epoch": 1.3369376327476346, "grad_norm": 0.9836011528968811, "learning_rate": 2.9908731636149735e-05, "loss": 0.5847, "step": 6924 }, { "epoch": 1.337130720216258, "grad_norm": 1.1456300020217896, "learning_rate": 2.9893302571107017e-05, "loss": 0.6186, "step": 6925 }, { "epoch": 1.3373238076848812, "grad_norm": 0.9619402289390564, "learning_rate": 2.9877875789676158e-05, "loss": 0.6595, "step": 6926 }, { "epoch": 1.3375168951535046, "grad_norm": 1.1406649351119995, "learning_rate": 2.9862451293609206e-05, "loss": 0.5987, "step": 6927 }, { "epoch": 1.337709982622128, "grad_norm": 1.117595911026001, "learning_rate": 2.9847029084658023e-05, "loss": 0.6127, "step": 6928 }, { "epoch": 1.337903070090751, "grad_norm": 1.0887467861175537, "learning_rate": 2.9831609164574194e-05, "loss": 0.6064, "step": 6929 }, { "epoch": 1.3380961575593744, "grad_norm": 1.1597005128860474, "learning_rate": 2.9816191535109005e-05, "loss": 0.6218, "step": 6930 }, { "epoch": 1.3382892450279977, "grad_norm": 1.49708890914917, "learning_rate": 2.9800776198013537e-05, "loss": 0.6653, "step": 6931 }, { "epoch": 1.3384823324966209, "grad_norm": 1.915450930595398, "learning_rate": 2.978536315503855e-05, "loss": 0.6279, "step": 6932 }, { "epoch": 1.3386754199652442, "grad_norm": 0.8473633527755737, "learning_rate": 2.9769952407934598e-05, "loss": 0.5879, "step": 6933 }, { "epoch": 1.3388685074338675, "grad_norm": 0.8001275062561035, "learning_rate": 2.975454395845194e-05, "loss": 0.5917, "step": 6934 }, { "epoch": 1.3390615949024909, "grad_norm": 1.0588258504867554, "learning_rate": 2.973913780834059e-05, "loss": 0.5771, "step": 6935 }, { "epoch": 1.3392546823711142, "grad_norm": 1.024025797843933, "learning_rate": 2.9723733959350307e-05, "loss": 0.6676, "step": 6936 }, { "epoch": 1.3394477698397373, "grad_norm": 0.8142301440238953, "learning_rate": 2.970833241323058e-05, "loss": 0.5636, "step": 6937 }, { "epoch": 1.3396408573083607, "grad_norm": 1.8251475095748901, "learning_rate": 2.9692933171730602e-05, "loss": 0.6191, "step": 6938 }, { "epoch": 1.339833944776984, "grad_norm": 1.73193359375, "learning_rate": 2.967753623659936e-05, "loss": 0.6064, "step": 6939 }, { "epoch": 1.3400270322456072, "grad_norm": 0.9550012946128845, "learning_rate": 2.9662141609585564e-05, "loss": 0.6314, "step": 6940 }, { "epoch": 1.3402201197142305, "grad_norm": 0.797669529914856, "learning_rate": 2.9646749292437636e-05, "loss": 0.618, "step": 6941 }, { "epoch": 1.3404132071828538, "grad_norm": 1.8059953451156616, "learning_rate": 2.963135928690377e-05, "loss": 0.5955, "step": 6942 }, { "epoch": 1.3406062946514772, "grad_norm": 0.5699341893196106, "learning_rate": 2.961597159473187e-05, "loss": 0.5944, "step": 6943 }, { "epoch": 1.3407993821201005, "grad_norm": 1.232055425643921, "learning_rate": 2.9600586217669603e-05, "loss": 0.6113, "step": 6944 }, { "epoch": 1.3409924695887236, "grad_norm": 0.9546524882316589, "learning_rate": 2.958520315746433e-05, "loss": 0.5871, "step": 6945 }, { "epoch": 1.341185557057347, "grad_norm": 0.9144407510757446, "learning_rate": 2.956982241586321e-05, "loss": 0.5567, "step": 6946 }, { "epoch": 1.3413786445259703, "grad_norm": 1.0324442386627197, "learning_rate": 2.9554443994613067e-05, "loss": 0.6727, "step": 6947 }, { "epoch": 1.3415717319945935, "grad_norm": 1.1063731908798218, "learning_rate": 2.9539067895460538e-05, "loss": 0.6065, "step": 6948 }, { "epoch": 1.3417648194632168, "grad_norm": 0.6664263010025024, "learning_rate": 2.9523694120151956e-05, "loss": 0.6216, "step": 6949 }, { "epoch": 1.3419579069318401, "grad_norm": 1.2026100158691406, "learning_rate": 2.9508322670433375e-05, "loss": 0.5489, "step": 6950 }, { "epoch": 1.3421509944004635, "grad_norm": 0.7783233523368835, "learning_rate": 2.9492953548050593e-05, "loss": 0.6788, "step": 6951 }, { "epoch": 1.3423440818690868, "grad_norm": 1.2078216075897217, "learning_rate": 2.9477586754749177e-05, "loss": 0.5899, "step": 6952 }, { "epoch": 1.34253716933771, "grad_norm": 1.6235203742980957, "learning_rate": 2.9462222292274405e-05, "loss": 0.515, "step": 6953 }, { "epoch": 1.3427302568063333, "grad_norm": 1.0116479396820068, "learning_rate": 2.944686016237129e-05, "loss": 0.6437, "step": 6954 }, { "epoch": 1.3429233442749566, "grad_norm": 0.8164277672767639, "learning_rate": 2.943150036678456e-05, "loss": 0.6976, "step": 6955 }, { "epoch": 1.3431164317435798, "grad_norm": 0.8948092460632324, "learning_rate": 2.9416142907258704e-05, "loss": 0.58, "step": 6956 }, { "epoch": 1.343309519212203, "grad_norm": 1.2779780626296997, "learning_rate": 2.9400787785537968e-05, "loss": 0.6033, "step": 6957 }, { "epoch": 1.3435026066808264, "grad_norm": 1.9299159049987793, "learning_rate": 2.938543500336628e-05, "loss": 0.6389, "step": 6958 }, { "epoch": 1.3436956941494498, "grad_norm": 1.1664670705795288, "learning_rate": 2.9370084562487328e-05, "loss": 0.654, "step": 6959 }, { "epoch": 1.3438887816180731, "grad_norm": 0.9306238889694214, "learning_rate": 2.9354736464644562e-05, "loss": 0.6314, "step": 6960 }, { "epoch": 1.3440818690866962, "grad_norm": 0.7620834112167358, "learning_rate": 2.9339390711581105e-05, "loss": 0.6231, "step": 6961 }, { "epoch": 1.3442749565553196, "grad_norm": 0.8657540678977966, "learning_rate": 2.932404730503985e-05, "loss": 0.5999, "step": 6962 }, { "epoch": 1.3444680440239427, "grad_norm": 0.8484442234039307, "learning_rate": 2.9308706246763418e-05, "loss": 0.6208, "step": 6963 }, { "epoch": 1.344661131492566, "grad_norm": 0.7369292974472046, "learning_rate": 2.929336753849419e-05, "loss": 0.6636, "step": 6964 }, { "epoch": 1.3448542189611894, "grad_norm": 2.934720516204834, "learning_rate": 2.927803118197423e-05, "loss": 0.5604, "step": 6965 }, { "epoch": 1.3450473064298127, "grad_norm": 1.1173968315124512, "learning_rate": 2.9262697178945385e-05, "loss": 0.6448, "step": 6966 }, { "epoch": 1.345240393898436, "grad_norm": 1.014592170715332, "learning_rate": 2.9247365531149172e-05, "loss": 0.5306, "step": 6967 }, { "epoch": 1.3454334813670592, "grad_norm": 1.298753261566162, "learning_rate": 2.923203624032691e-05, "loss": 0.6163, "step": 6968 }, { "epoch": 1.3456265688356825, "grad_norm": 1.0974323749542236, "learning_rate": 2.921670930821959e-05, "loss": 0.6226, "step": 6969 }, { "epoch": 1.345819656304306, "grad_norm": 0.8502602577209473, "learning_rate": 2.9201384736567993e-05, "loss": 0.6133, "step": 6970 }, { "epoch": 1.346012743772929, "grad_norm": 0.9338769316673279, "learning_rate": 2.918606252711258e-05, "loss": 0.5247, "step": 6971 }, { "epoch": 1.3462058312415524, "grad_norm": 0.7591087222099304, "learning_rate": 2.917074268159361e-05, "loss": 0.5546, "step": 6972 }, { "epoch": 1.3463989187101757, "grad_norm": 0.758638322353363, "learning_rate": 2.9155425201750973e-05, "loss": 0.5529, "step": 6973 }, { "epoch": 1.346592006178799, "grad_norm": 0.8703638315200806, "learning_rate": 2.9140110089324357e-05, "loss": 0.6125, "step": 6974 }, { "epoch": 1.3467850936474224, "grad_norm": 1.0282542705535889, "learning_rate": 2.9124797346053194e-05, "loss": 0.5966, "step": 6975 }, { "epoch": 1.3469781811160455, "grad_norm": 0.8312037587165833, "learning_rate": 2.910948697367662e-05, "loss": 0.6432, "step": 6976 }, { "epoch": 1.3471712685846688, "grad_norm": 0.8764210343360901, "learning_rate": 2.9094178973933495e-05, "loss": 0.6188, "step": 6977 }, { "epoch": 1.3473643560532922, "grad_norm": 0.6437021493911743, "learning_rate": 2.907887334856243e-05, "loss": 0.6035, "step": 6978 }, { "epoch": 1.3475574435219153, "grad_norm": 0.9634280204772949, "learning_rate": 2.906357009930173e-05, "loss": 0.5947, "step": 6979 }, { "epoch": 1.3477505309905387, "grad_norm": 0.9772653579711914, "learning_rate": 2.9048269227889498e-05, "loss": 0.5435, "step": 6980 }, { "epoch": 1.347943618459162, "grad_norm": 1.5309662818908691, "learning_rate": 2.9032970736063502e-05, "loss": 0.6969, "step": 6981 }, { "epoch": 1.3481367059277853, "grad_norm": 0.9217801690101624, "learning_rate": 2.9017674625561252e-05, "loss": 0.6661, "step": 6982 }, { "epoch": 1.3483297933964087, "grad_norm": 1.2996008396148682, "learning_rate": 2.9002380898120053e-05, "loss": 0.6563, "step": 6983 }, { "epoch": 1.3485228808650318, "grad_norm": 1.1600322723388672, "learning_rate": 2.898708955547682e-05, "loss": 0.6475, "step": 6984 }, { "epoch": 1.3487159683336551, "grad_norm": 1.4465720653533936, "learning_rate": 2.8971800599368275e-05, "loss": 0.6189, "step": 6985 }, { "epoch": 1.3489090558022785, "grad_norm": 0.8116979598999023, "learning_rate": 2.895651403153089e-05, "loss": 0.5626, "step": 6986 }, { "epoch": 1.3491021432709016, "grad_norm": 0.8568193912506104, "learning_rate": 2.8941229853700814e-05, "loss": 0.6257, "step": 6987 }, { "epoch": 1.349295230739525, "grad_norm": 0.7667386531829834, "learning_rate": 2.892594806761392e-05, "loss": 0.6378, "step": 6988 }, { "epoch": 1.3494883182081483, "grad_norm": 0.717978835105896, "learning_rate": 2.8910668675005885e-05, "loss": 0.6218, "step": 6989 }, { "epoch": 1.3496814056767716, "grad_norm": 0.9178285598754883, "learning_rate": 2.8895391677612e-05, "loss": 0.6969, "step": 6990 }, { "epoch": 1.349874493145395, "grad_norm": 0.8396768569946289, "learning_rate": 2.888011707716739e-05, "loss": 0.681, "step": 6991 }, { "epoch": 1.350067580614018, "grad_norm": 0.9703666567802429, "learning_rate": 2.886484487540685e-05, "loss": 0.5476, "step": 6992 }, { "epoch": 1.3502606680826414, "grad_norm": 0.7061964869499207, "learning_rate": 2.8849575074064883e-05, "loss": 0.7006, "step": 6993 }, { "epoch": 1.3504537555512648, "grad_norm": 0.5729458332061768, "learning_rate": 2.8834307674875834e-05, "loss": 0.5611, "step": 6994 }, { "epoch": 1.350646843019888, "grad_norm": 1.0617157220840454, "learning_rate": 2.8819042679573617e-05, "loss": 0.574, "step": 6995 }, { "epoch": 1.3508399304885113, "grad_norm": 0.9488717317581177, "learning_rate": 2.8803780089891953e-05, "loss": 0.5647, "step": 6996 }, { "epoch": 1.3510330179571346, "grad_norm": 2.8022985458374023, "learning_rate": 2.878851990756432e-05, "loss": 0.552, "step": 6997 }, { "epoch": 1.351226105425758, "grad_norm": 0.7901433706283569, "learning_rate": 2.877326213432388e-05, "loss": 0.6415, "step": 6998 }, { "epoch": 1.3514191928943813, "grad_norm": 0.671694815158844, "learning_rate": 2.87580067719035e-05, "loss": 0.5944, "step": 6999 }, { "epoch": 1.3516122803630044, "grad_norm": 0.8845682144165039, "learning_rate": 2.8742753822035873e-05, "loss": 0.5804, "step": 7000 }, { "epoch": 1.3516122803630044, "eval_loss": 0.6603716015815735, "eval_runtime": 49.3158, "eval_samples_per_second": 13.464, "eval_steps_per_second": 0.426, "step": 7000 }, { "epoch": 1.3518053678316277, "grad_norm": 1.334876537322998, "learning_rate": 2.8727503286453262e-05, "loss": 0.6304, "step": 7001 }, { "epoch": 1.351998455300251, "grad_norm": 0.9840203523635864, "learning_rate": 2.8712255166887798e-05, "loss": 0.6004, "step": 7002 }, { "epoch": 1.3521915427688742, "grad_norm": 0.7489239573478699, "learning_rate": 2.8697009465071267e-05, "loss": 0.6467, "step": 7003 }, { "epoch": 1.3523846302374976, "grad_norm": 0.9219282269477844, "learning_rate": 2.868176618273517e-05, "loss": 0.5971, "step": 7004 }, { "epoch": 1.352577717706121, "grad_norm": 0.8147475123405457, "learning_rate": 2.8666525321610803e-05, "loss": 0.7049, "step": 7005 }, { "epoch": 1.3527708051747442, "grad_norm": 0.9500361680984497, "learning_rate": 2.865128688342914e-05, "loss": 0.6153, "step": 7006 }, { "epoch": 1.3529638926433676, "grad_norm": 0.8453713655471802, "learning_rate": 2.8636050869920816e-05, "loss": 0.5705, "step": 7007 }, { "epoch": 1.3531569801119907, "grad_norm": 1.7091424465179443, "learning_rate": 2.862081728281633e-05, "loss": 0.5962, "step": 7008 }, { "epoch": 1.353350067580614, "grad_norm": 0.8503004908561707, "learning_rate": 2.86055861238458e-05, "loss": 0.6276, "step": 7009 }, { "epoch": 1.3535431550492372, "grad_norm": 0.9054083228111267, "learning_rate": 2.8590357394739087e-05, "loss": 0.6211, "step": 7010 }, { "epoch": 1.3537362425178605, "grad_norm": 0.8249316811561584, "learning_rate": 2.8575131097225828e-05, "loss": 0.6194, "step": 7011 }, { "epoch": 1.3539293299864839, "grad_norm": 1.0282949209213257, "learning_rate": 2.855990723303532e-05, "loss": 0.5968, "step": 7012 }, { "epoch": 1.3541224174551072, "grad_norm": 1.2673041820526123, "learning_rate": 2.854468580389661e-05, "loss": 0.5569, "step": 7013 }, { "epoch": 1.3543155049237305, "grad_norm": 0.7541619539260864, "learning_rate": 2.8529466811538467e-05, "loss": 0.6083, "step": 7014 }, { "epoch": 1.3545085923923537, "grad_norm": 1.0092428922653198, "learning_rate": 2.851425025768939e-05, "loss": 0.6045, "step": 7015 }, { "epoch": 1.354701679860977, "grad_norm": 1.0279325246810913, "learning_rate": 2.8499036144077575e-05, "loss": 0.5727, "step": 7016 }, { "epoch": 1.3548947673296003, "grad_norm": 1.1604773998260498, "learning_rate": 2.8483824472431002e-05, "loss": 0.6159, "step": 7017 }, { "epoch": 1.3550878547982235, "grad_norm": 1.1527738571166992, "learning_rate": 2.846861524447727e-05, "loss": 0.568, "step": 7018 }, { "epoch": 1.3552809422668468, "grad_norm": 0.8005809187889099, "learning_rate": 2.845340846194382e-05, "loss": 0.5675, "step": 7019 }, { "epoch": 1.3554740297354702, "grad_norm": 0.6520674824714661, "learning_rate": 2.8438204126557733e-05, "loss": 0.5582, "step": 7020 }, { "epoch": 1.3556671172040935, "grad_norm": 0.8139907121658325, "learning_rate": 2.8423002240045814e-05, "loss": 0.639, "step": 7021 }, { "epoch": 1.3558602046727168, "grad_norm": 0.7396255731582642, "learning_rate": 2.8407802804134664e-05, "loss": 0.6351, "step": 7022 }, { "epoch": 1.35605329214134, "grad_norm": 1.1040339469909668, "learning_rate": 2.8392605820550537e-05, "loss": 0.6993, "step": 7023 }, { "epoch": 1.3562463796099633, "grad_norm": 0.9951575994491577, "learning_rate": 2.8377411291019386e-05, "loss": 0.6154, "step": 7024 }, { "epoch": 1.3564394670785866, "grad_norm": 0.6626946926116943, "learning_rate": 2.8362219217266963e-05, "loss": 0.6196, "step": 7025 }, { "epoch": 1.3566325545472098, "grad_norm": 0.9344784021377563, "learning_rate": 2.83470296010187e-05, "loss": 0.6127, "step": 7026 }, { "epoch": 1.3568256420158331, "grad_norm": 2.4240176677703857, "learning_rate": 2.8331842443999723e-05, "loss": 0.5708, "step": 7027 }, { "epoch": 1.3570187294844565, "grad_norm": 1.3227072954177856, "learning_rate": 2.831665774793496e-05, "loss": 0.661, "step": 7028 }, { "epoch": 1.3572118169530798, "grad_norm": 11.338337898254395, "learning_rate": 2.8301475514548976e-05, "loss": 0.6501, "step": 7029 }, { "epoch": 1.3574049044217031, "grad_norm": 0.6060382127761841, "learning_rate": 2.8286295745566095e-05, "loss": 0.6026, "step": 7030 }, { "epoch": 1.3575979918903263, "grad_norm": 1.0989662408828735, "learning_rate": 2.827111844271035e-05, "loss": 0.5873, "step": 7031 }, { "epoch": 1.3577910793589496, "grad_norm": 1.1340495347976685, "learning_rate": 2.825594360770548e-05, "loss": 0.6207, "step": 7032 }, { "epoch": 1.357984166827573, "grad_norm": 0.7359364628791809, "learning_rate": 2.8240771242275004e-05, "loss": 0.6635, "step": 7033 }, { "epoch": 1.358177254296196, "grad_norm": 1.0341371297836304, "learning_rate": 2.82256013481421e-05, "loss": 0.676, "step": 7034 }, { "epoch": 1.3583703417648194, "grad_norm": 0.8769217729568481, "learning_rate": 2.8210433927029677e-05, "loss": 0.5665, "step": 7035 }, { "epoch": 1.3585634292334428, "grad_norm": 0.82696932554245, "learning_rate": 2.8195268980660384e-05, "loss": 0.5651, "step": 7036 }, { "epoch": 1.358756516702066, "grad_norm": 0.8450479507446289, "learning_rate": 2.8180106510756555e-05, "loss": 0.5343, "step": 7037 }, { "epoch": 1.3589496041706894, "grad_norm": 0.9596859216690063, "learning_rate": 2.816494651904026e-05, "loss": 0.5991, "step": 7038 }, { "epoch": 1.3591426916393126, "grad_norm": 0.7210043668746948, "learning_rate": 2.8149789007233325e-05, "loss": 0.642, "step": 7039 }, { "epoch": 1.359335779107936, "grad_norm": 1.1665396690368652, "learning_rate": 2.8134633977057235e-05, "loss": 0.6282, "step": 7040 }, { "epoch": 1.3595288665765592, "grad_norm": 1.2489826679229736, "learning_rate": 2.811948143023322e-05, "loss": 0.6183, "step": 7041 }, { "epoch": 1.3597219540451824, "grad_norm": 0.9350616335868835, "learning_rate": 2.810433136848223e-05, "loss": 0.5531, "step": 7042 }, { "epoch": 1.3599150415138057, "grad_norm": 1.0610086917877197, "learning_rate": 2.808918379352491e-05, "loss": 0.6117, "step": 7043 }, { "epoch": 1.360108128982429, "grad_norm": 0.7919730544090271, "learning_rate": 2.807403870708166e-05, "loss": 0.6499, "step": 7044 }, { "epoch": 1.3603012164510524, "grad_norm": 1.2888926267623901, "learning_rate": 2.805889611087259e-05, "loss": 0.5999, "step": 7045 }, { "epoch": 1.3604943039196757, "grad_norm": 2.5959932804107666, "learning_rate": 2.80437560066175e-05, "loss": 0.6649, "step": 7046 }, { "epoch": 1.3606873913882989, "grad_norm": 0.7050210237503052, "learning_rate": 2.802861839603592e-05, "loss": 0.6057, "step": 7047 }, { "epoch": 1.3608804788569222, "grad_norm": 0.6164063215255737, "learning_rate": 2.80134832808471e-05, "loss": 0.5257, "step": 7048 }, { "epoch": 1.3610735663255455, "grad_norm": 0.9123704433441162, "learning_rate": 2.799835066276999e-05, "loss": 0.6672, "step": 7049 }, { "epoch": 1.3612666537941687, "grad_norm": 0.8317716121673584, "learning_rate": 2.7983220543523314e-05, "loss": 0.5927, "step": 7050 }, { "epoch": 1.361459741262792, "grad_norm": 0.9485567212104797, "learning_rate": 2.796809292482545e-05, "loss": 0.5892, "step": 7051 }, { "epoch": 1.3616528287314154, "grad_norm": 1.2183278799057007, "learning_rate": 2.7952967808394504e-05, "loss": 0.595, "step": 7052 }, { "epoch": 1.3618459162000387, "grad_norm": 0.747002363204956, "learning_rate": 2.7937845195948308e-05, "loss": 0.6022, "step": 7053 }, { "epoch": 1.362039003668662, "grad_norm": 0.7239018082618713, "learning_rate": 2.7922725089204426e-05, "loss": 0.7072, "step": 7054 }, { "epoch": 1.3622320911372852, "grad_norm": 0.7175042629241943, "learning_rate": 2.790760748988007e-05, "loss": 0.5989, "step": 7055 }, { "epoch": 1.3624251786059085, "grad_norm": 0.8255454301834106, "learning_rate": 2.7892492399692282e-05, "loss": 0.5479, "step": 7056 }, { "epoch": 1.3626182660745316, "grad_norm": 0.9011998772621155, "learning_rate": 2.7877379820357723e-05, "loss": 0.6388, "step": 7057 }, { "epoch": 1.362811353543155, "grad_norm": 1.1573845148086548, "learning_rate": 2.78622697535928e-05, "loss": 0.5409, "step": 7058 }, { "epoch": 1.3630044410117783, "grad_norm": 0.8537425398826599, "learning_rate": 2.784716220111363e-05, "loss": 0.6964, "step": 7059 }, { "epoch": 1.3631975284804017, "grad_norm": 0.8417124152183533, "learning_rate": 2.783205716463604e-05, "loss": 0.5619, "step": 7060 }, { "epoch": 1.363390615949025, "grad_norm": 0.9531185626983643, "learning_rate": 2.7816954645875608e-05, "loss": 0.6019, "step": 7061 }, { "epoch": 1.3635837034176481, "grad_norm": 1.8027703762054443, "learning_rate": 2.7801854646547587e-05, "loss": 0.5806, "step": 7062 }, { "epoch": 1.3637767908862715, "grad_norm": 0.9404968619346619, "learning_rate": 2.7786757168366943e-05, "loss": 0.5567, "step": 7063 }, { "epoch": 1.3639698783548948, "grad_norm": 0.6879907250404358, "learning_rate": 2.7771662213048376e-05, "loss": 0.5983, "step": 7064 }, { "epoch": 1.364162965823518, "grad_norm": 1.2384555339813232, "learning_rate": 2.7756569782306297e-05, "loss": 0.6468, "step": 7065 }, { "epoch": 1.3643560532921413, "grad_norm": 1.9291045665740967, "learning_rate": 2.7741479877854793e-05, "loss": 0.6141, "step": 7066 }, { "epoch": 1.3645491407607646, "grad_norm": 0.788163959980011, "learning_rate": 2.7726392501407734e-05, "loss": 0.6213, "step": 7067 }, { "epoch": 1.364742228229388, "grad_norm": 0.8763554692268372, "learning_rate": 2.7711307654678652e-05, "loss": 0.5589, "step": 7068 }, { "epoch": 1.3649353156980113, "grad_norm": 0.7648287415504456, "learning_rate": 2.76962253393808e-05, "loss": 0.5721, "step": 7069 }, { "epoch": 1.3651284031666344, "grad_norm": 0.8038166761398315, "learning_rate": 2.7681145557227145e-05, "loss": 0.6273, "step": 7070 }, { "epoch": 1.3653214906352578, "grad_norm": 0.7276261448860168, "learning_rate": 2.7666068309930353e-05, "loss": 0.6399, "step": 7071 }, { "epoch": 1.365514578103881, "grad_norm": 1.3111213445663452, "learning_rate": 2.7650993599202858e-05, "loss": 0.5832, "step": 7072 }, { "epoch": 1.3657076655725042, "grad_norm": 0.8156010508537292, "learning_rate": 2.763592142675674e-05, "loss": 0.5744, "step": 7073 }, { "epoch": 1.3659007530411276, "grad_norm": 1.1117660999298096, "learning_rate": 2.7620851794303802e-05, "loss": 0.6189, "step": 7074 }, { "epoch": 1.366093840509751, "grad_norm": 0.9676013588905334, "learning_rate": 2.7605784703555627e-05, "loss": 0.5394, "step": 7075 }, { "epoch": 1.3662869279783743, "grad_norm": 0.8055189251899719, "learning_rate": 2.7590720156223404e-05, "loss": 0.6014, "step": 7076 }, { "epoch": 1.3664800154469976, "grad_norm": 0.7353721261024475, "learning_rate": 2.757565815401807e-05, "loss": 0.6062, "step": 7077 }, { "epoch": 1.3666731029156207, "grad_norm": 1.0359660387039185, "learning_rate": 2.7560598698650342e-05, "loss": 0.6588, "step": 7078 }, { "epoch": 1.366866190384244, "grad_norm": 0.7168145179748535, "learning_rate": 2.7545541791830565e-05, "loss": 0.6393, "step": 7079 }, { "epoch": 1.3670592778528674, "grad_norm": 0.7124112248420715, "learning_rate": 2.7530487435268827e-05, "loss": 0.6379, "step": 7080 }, { "epoch": 1.3672523653214905, "grad_norm": 1.6416971683502197, "learning_rate": 2.751543563067491e-05, "loss": 0.6259, "step": 7081 }, { "epoch": 1.3674454527901139, "grad_norm": 0.9263790845870972, "learning_rate": 2.7500386379758315e-05, "loss": 0.6578, "step": 7082 }, { "epoch": 1.3676385402587372, "grad_norm": 0.877169668674469, "learning_rate": 2.7485339684228284e-05, "loss": 0.5859, "step": 7083 }, { "epoch": 1.3678316277273606, "grad_norm": 0.8261033892631531, "learning_rate": 2.7470295545793733e-05, "loss": 0.6043, "step": 7084 }, { "epoch": 1.368024715195984, "grad_norm": 0.85209721326828, "learning_rate": 2.745525396616326e-05, "loss": 0.5857, "step": 7085 }, { "epoch": 1.368217802664607, "grad_norm": 2.623443126678467, "learning_rate": 2.7440214947045272e-05, "loss": 0.5835, "step": 7086 }, { "epoch": 1.3684108901332304, "grad_norm": 0.8111853003501892, "learning_rate": 2.7425178490147768e-05, "loss": 0.6153, "step": 7087 }, { "epoch": 1.3686039776018537, "grad_norm": 1.5927941799163818, "learning_rate": 2.74101445971785e-05, "loss": 0.5969, "step": 7088 }, { "epoch": 1.3687970650704768, "grad_norm": 1.6551513671875, "learning_rate": 2.7395113269844985e-05, "loss": 0.6186, "step": 7089 }, { "epoch": 1.3689901525391002, "grad_norm": 0.6239065527915955, "learning_rate": 2.738008450985438e-05, "loss": 0.5731, "step": 7090 }, { "epoch": 1.3691832400077235, "grad_norm": 0.8379409313201904, "learning_rate": 2.736505831891355e-05, "loss": 0.5987, "step": 7091 }, { "epoch": 1.3693763274763469, "grad_norm": 0.9615848064422607, "learning_rate": 2.7350034698729144e-05, "loss": 0.5631, "step": 7092 }, { "epoch": 1.3695694149449702, "grad_norm": 0.9760701060295105, "learning_rate": 2.7335013651007396e-05, "loss": 0.6596, "step": 7093 }, { "epoch": 1.3697625024135933, "grad_norm": 1.9515351057052612, "learning_rate": 2.731999517745437e-05, "loss": 0.6378, "step": 7094 }, { "epoch": 1.3699555898822167, "grad_norm": 1.6914595365524292, "learning_rate": 2.7304979279775768e-05, "loss": 0.6364, "step": 7095 }, { "epoch": 1.37014867735084, "grad_norm": 0.7091649174690247, "learning_rate": 2.7289965959677012e-05, "loss": 0.565, "step": 7096 }, { "epoch": 1.3703417648194631, "grad_norm": 1.7136991024017334, "learning_rate": 2.727495521886322e-05, "loss": 0.5399, "step": 7097 }, { "epoch": 1.3705348522880865, "grad_norm": 1.8287450075149536, "learning_rate": 2.7259947059039282e-05, "loss": 0.5865, "step": 7098 }, { "epoch": 1.3707279397567098, "grad_norm": 1.1425132751464844, "learning_rate": 2.724494148190968e-05, "loss": 0.5493, "step": 7099 }, { "epoch": 1.3709210272253332, "grad_norm": 1.2887738943099976, "learning_rate": 2.7229938489178714e-05, "loss": 0.5782, "step": 7100 }, { "epoch": 1.3711141146939565, "grad_norm": 1.3946926593780518, "learning_rate": 2.7214938082550333e-05, "loss": 0.6217, "step": 7101 }, { "epoch": 1.3713072021625796, "grad_norm": 0.8363714218139648, "learning_rate": 2.719994026372818e-05, "loss": 0.6446, "step": 7102 }, { "epoch": 1.371500289631203, "grad_norm": 1.1676074266433716, "learning_rate": 2.7184945034415688e-05, "loss": 0.6274, "step": 7103 }, { "epoch": 1.371693377099826, "grad_norm": 1.2112592458724976, "learning_rate": 2.7169952396315872e-05, "loss": 0.5906, "step": 7104 }, { "epoch": 1.3718864645684494, "grad_norm": 2.4462430477142334, "learning_rate": 2.7154962351131524e-05, "loss": 0.5821, "step": 7105 }, { "epoch": 1.3720795520370728, "grad_norm": 0.6498629450798035, "learning_rate": 2.713997490056517e-05, "loss": 0.613, "step": 7106 }, { "epoch": 1.3722726395056961, "grad_norm": 1.1014721393585205, "learning_rate": 2.712499004631898e-05, "loss": 0.6384, "step": 7107 }, { "epoch": 1.3724657269743195, "grad_norm": 0.776785135269165, "learning_rate": 2.7110007790094842e-05, "loss": 0.6141, "step": 7108 }, { "epoch": 1.3726588144429426, "grad_norm": 2.925737142562866, "learning_rate": 2.709502813359442e-05, "loss": 0.57, "step": 7109 }, { "epoch": 1.372851901911566, "grad_norm": 1.0808923244476318, "learning_rate": 2.7080051078518935e-05, "loss": 0.5417, "step": 7110 }, { "epoch": 1.3730449893801893, "grad_norm": 1.2639720439910889, "learning_rate": 2.7065076626569475e-05, "loss": 0.6603, "step": 7111 }, { "epoch": 1.3732380768488124, "grad_norm": 1.047855257987976, "learning_rate": 2.7050104779446726e-05, "loss": 0.6156, "step": 7112 }, { "epoch": 1.3734311643174357, "grad_norm": 1.4658766984939575, "learning_rate": 2.7035135538851096e-05, "loss": 0.6729, "step": 7113 }, { "epoch": 1.373624251786059, "grad_norm": 1.5620614290237427, "learning_rate": 2.7020168906482756e-05, "loss": 0.5569, "step": 7114 }, { "epoch": 1.3738173392546824, "grad_norm": 1.289572834968567, "learning_rate": 2.700520488404153e-05, "loss": 0.6668, "step": 7115 }, { "epoch": 1.3740104267233058, "grad_norm": 2.9646081924438477, "learning_rate": 2.6990243473226895e-05, "loss": 0.6455, "step": 7116 }, { "epoch": 1.3742035141919289, "grad_norm": 0.7358741760253906, "learning_rate": 2.6975284675738144e-05, "loss": 0.5878, "step": 7117 }, { "epoch": 1.3743966016605522, "grad_norm": 2.255056381225586, "learning_rate": 2.6960328493274207e-05, "loss": 0.615, "step": 7118 }, { "epoch": 1.3745896891291756, "grad_norm": 0.9455053210258484, "learning_rate": 2.69453749275337e-05, "loss": 0.6168, "step": 7119 }, { "epoch": 1.3747827765977987, "grad_norm": 1.075303077697754, "learning_rate": 2.6930423980215036e-05, "loss": 0.6155, "step": 7120 }, { "epoch": 1.374975864066422, "grad_norm": 6.958988666534424, "learning_rate": 2.6915475653016176e-05, "loss": 0.6601, "step": 7121 }, { "epoch": 1.3751689515350454, "grad_norm": 1.0005491971969604, "learning_rate": 2.690052994763494e-05, "loss": 0.573, "step": 7122 }, { "epoch": 1.3753620390036687, "grad_norm": 0.8845051527023315, "learning_rate": 2.6885586865768754e-05, "loss": 0.5649, "step": 7123 }, { "epoch": 1.375555126472292, "grad_norm": 1.10254967212677, "learning_rate": 2.6870646409114758e-05, "loss": 0.6048, "step": 7124 }, { "epoch": 1.3757482139409152, "grad_norm": 1.0341308116912842, "learning_rate": 2.6855708579369855e-05, "loss": 0.6296, "step": 7125 }, { "epoch": 1.3759413014095385, "grad_norm": 0.6714518070220947, "learning_rate": 2.6840773378230587e-05, "loss": 0.6555, "step": 7126 }, { "epoch": 1.3761343888781619, "grad_norm": 0.7244880199432373, "learning_rate": 2.6825840807393177e-05, "loss": 0.5733, "step": 7127 }, { "epoch": 1.376327476346785, "grad_norm": 0.7643588185310364, "learning_rate": 2.681091086855363e-05, "loss": 0.5766, "step": 7128 }, { "epoch": 1.3765205638154083, "grad_norm": 1.0809026956558228, "learning_rate": 2.67959835634076e-05, "loss": 0.5846, "step": 7129 }, { "epoch": 1.3767136512840317, "grad_norm": 1.0099411010742188, "learning_rate": 2.6781058893650424e-05, "loss": 0.6716, "step": 7130 }, { "epoch": 1.376906738752655, "grad_norm": 0.7419795393943787, "learning_rate": 2.676613686097721e-05, "loss": 0.598, "step": 7131 }, { "epoch": 1.3770998262212784, "grad_norm": 0.7501850128173828, "learning_rate": 2.67512174670827e-05, "loss": 0.5732, "step": 7132 }, { "epoch": 1.3772929136899015, "grad_norm": 0.9949088096618652, "learning_rate": 2.6736300713661362e-05, "loss": 0.6059, "step": 7133 }, { "epoch": 1.3774860011585248, "grad_norm": 0.9261133074760437, "learning_rate": 2.6721386602407362e-05, "loss": 0.5763, "step": 7134 }, { "epoch": 1.3776790886271482, "grad_norm": 1.6870039701461792, "learning_rate": 2.6706475135014545e-05, "loss": 0.6463, "step": 7135 }, { "epoch": 1.3778721760957713, "grad_norm": 0.7959445714950562, "learning_rate": 2.6691566313176518e-05, "loss": 0.614, "step": 7136 }, { "epoch": 1.3780652635643946, "grad_norm": 0.7646054029464722, "learning_rate": 2.667666013858652e-05, "loss": 0.6281, "step": 7137 }, { "epoch": 1.378258351033018, "grad_norm": 0.6335840225219727, "learning_rate": 2.6661756612937522e-05, "loss": 0.6153, "step": 7138 }, { "epoch": 1.3784514385016413, "grad_norm": 1.225212574005127, "learning_rate": 2.6646855737922182e-05, "loss": 0.5952, "step": 7139 }, { "epoch": 1.3786445259702647, "grad_norm": 1.0113261938095093, "learning_rate": 2.6631957515232863e-05, "loss": 0.6265, "step": 7140 }, { "epoch": 1.3788376134388878, "grad_norm": 1.9089655876159668, "learning_rate": 2.6617061946561615e-05, "loss": 0.5942, "step": 7141 }, { "epoch": 1.3790307009075111, "grad_norm": 0.9446929693222046, "learning_rate": 2.660216903360022e-05, "loss": 0.5931, "step": 7142 }, { "epoch": 1.3792237883761345, "grad_norm": 0.8412346839904785, "learning_rate": 2.6587278778040137e-05, "loss": 0.6071, "step": 7143 }, { "epoch": 1.3794168758447576, "grad_norm": 0.822841465473175, "learning_rate": 2.65723911815725e-05, "loss": 0.5625, "step": 7144 }, { "epoch": 1.379609963313381, "grad_norm": 1.4085062742233276, "learning_rate": 2.6557506245888177e-05, "loss": 0.5816, "step": 7145 }, { "epoch": 1.3798030507820043, "grad_norm": 1.092523217201233, "learning_rate": 2.6542623972677717e-05, "loss": 0.5749, "step": 7146 }, { "epoch": 1.3799961382506276, "grad_norm": 1.6352399587631226, "learning_rate": 2.6527744363631357e-05, "loss": 0.6368, "step": 7147 }, { "epoch": 1.380189225719251, "grad_norm": 1.3236931562423706, "learning_rate": 2.651286742043907e-05, "loss": 0.6103, "step": 7148 }, { "epoch": 1.380382313187874, "grad_norm": 1.2443327903747559, "learning_rate": 2.6497993144790477e-05, "loss": 0.5512, "step": 7149 }, { "epoch": 1.3805754006564974, "grad_norm": 1.188206434249878, "learning_rate": 2.6483121538374946e-05, "loss": 0.599, "step": 7150 }, { "epoch": 1.3807684881251205, "grad_norm": 0.9161533117294312, "learning_rate": 2.646825260288148e-05, "loss": 0.6665, "step": 7151 }, { "epoch": 1.3809615755937439, "grad_norm": 2.661438226699829, "learning_rate": 2.6453386339998827e-05, "loss": 0.6541, "step": 7152 }, { "epoch": 1.3811546630623672, "grad_norm": 1.5294487476348877, "learning_rate": 2.643852275141544e-05, "loss": 0.5919, "step": 7153 }, { "epoch": 1.3813477505309906, "grad_norm": 1.3739663362503052, "learning_rate": 2.6423661838819424e-05, "loss": 0.6353, "step": 7154 }, { "epoch": 1.381540837999614, "grad_norm": 0.9174882769584656, "learning_rate": 2.6408803603898603e-05, "loss": 0.6093, "step": 7155 }, { "epoch": 1.381733925468237, "grad_norm": 1.3474303483963013, "learning_rate": 2.6393948048340512e-05, "loss": 0.6551, "step": 7156 }, { "epoch": 1.3819270129368604, "grad_norm": 2.187455654144287, "learning_rate": 2.6379095173832346e-05, "loss": 0.5976, "step": 7157 }, { "epoch": 1.3821201004054837, "grad_norm": 1.1945419311523438, "learning_rate": 2.6364244982061005e-05, "loss": 0.6769, "step": 7158 }, { "epoch": 1.3823131878741068, "grad_norm": 1.8608282804489136, "learning_rate": 2.6349397474713134e-05, "loss": 0.5888, "step": 7159 }, { "epoch": 1.3825062753427302, "grad_norm": 1.8324958086013794, "learning_rate": 2.6334552653475018e-05, "loss": 0.5725, "step": 7160 }, { "epoch": 1.3826993628113535, "grad_norm": 1.6677013635635376, "learning_rate": 2.6319710520032648e-05, "loss": 0.5525, "step": 7161 }, { "epoch": 1.3828924502799769, "grad_norm": 2.0640523433685303, "learning_rate": 2.6304871076071714e-05, "loss": 0.6479, "step": 7162 }, { "epoch": 1.3830855377486002, "grad_norm": 0.8386700749397278, "learning_rate": 2.6290034323277585e-05, "loss": 0.6294, "step": 7163 }, { "epoch": 1.3832786252172233, "grad_norm": 1.0812716484069824, "learning_rate": 2.6275200263335376e-05, "loss": 0.5625, "step": 7164 }, { "epoch": 1.3834717126858467, "grad_norm": 1.2087966203689575, "learning_rate": 2.6260368897929845e-05, "loss": 0.5686, "step": 7165 }, { "epoch": 1.38366480015447, "grad_norm": 1.513214349746704, "learning_rate": 2.6245540228745456e-05, "loss": 0.6299, "step": 7166 }, { "epoch": 1.3838578876230931, "grad_norm": 1.4952523708343506, "learning_rate": 2.6230714257466372e-05, "loss": 0.6228, "step": 7167 }, { "epoch": 1.3840509750917165, "grad_norm": 0.9488731622695923, "learning_rate": 2.621589098577645e-05, "loss": 0.625, "step": 7168 }, { "epoch": 1.3842440625603398, "grad_norm": 1.6093416213989258, "learning_rate": 2.6201070415359218e-05, "loss": 0.6554, "step": 7169 }, { "epoch": 1.3844371500289632, "grad_norm": 0.7600796222686768, "learning_rate": 2.618625254789795e-05, "loss": 0.6102, "step": 7170 }, { "epoch": 1.3846302374975865, "grad_norm": 1.9426631927490234, "learning_rate": 2.617143738507557e-05, "loss": 0.6371, "step": 7171 }, { "epoch": 1.3848233249662096, "grad_norm": 2.2656304836273193, "learning_rate": 2.6156624928574707e-05, "loss": 0.6204, "step": 7172 }, { "epoch": 1.385016412434833, "grad_norm": 0.8972819447517395, "learning_rate": 2.614181518007768e-05, "loss": 0.6443, "step": 7173 }, { "epoch": 1.3852094999034563, "grad_norm": 1.7401089668273926, "learning_rate": 2.6127008141266462e-05, "loss": 0.5983, "step": 7174 }, { "epoch": 1.3854025873720794, "grad_norm": 1.0941522121429443, "learning_rate": 2.6112203813822827e-05, "loss": 0.5854, "step": 7175 }, { "epoch": 1.3855956748407028, "grad_norm": 3.0304207801818848, "learning_rate": 2.6097402199428135e-05, "loss": 0.6164, "step": 7176 }, { "epoch": 1.3857887623093261, "grad_norm": 0.712145209312439, "learning_rate": 2.6082603299763474e-05, "loss": 0.6426, "step": 7177 }, { "epoch": 1.3859818497779495, "grad_norm": 0.9261152744293213, "learning_rate": 2.6067807116509637e-05, "loss": 0.6046, "step": 7178 }, { "epoch": 1.3861749372465728, "grad_norm": 1.2454564571380615, "learning_rate": 2.605301365134708e-05, "loss": 0.5936, "step": 7179 }, { "epoch": 1.386368024715196, "grad_norm": 1.1111396551132202, "learning_rate": 2.6038222905955955e-05, "loss": 0.596, "step": 7180 }, { "epoch": 1.3865611121838193, "grad_norm": 1.1918269395828247, "learning_rate": 2.6023434882016162e-05, "loss": 0.6155, "step": 7181 }, { "epoch": 1.3867541996524426, "grad_norm": 1.8659977912902832, "learning_rate": 2.6008649581207213e-05, "loss": 0.5747, "step": 7182 }, { "epoch": 1.3869472871210657, "grad_norm": 1.2071057558059692, "learning_rate": 2.599386700520835e-05, "loss": 0.5658, "step": 7183 }, { "epoch": 1.387140374589689, "grad_norm": 1.899997591972351, "learning_rate": 2.59790871556985e-05, "loss": 0.6206, "step": 7184 }, { "epoch": 1.3873334620583124, "grad_norm": 0.9058568477630615, "learning_rate": 2.5964310034356282e-05, "loss": 0.6525, "step": 7185 }, { "epoch": 1.3875265495269358, "grad_norm": 1.421119213104248, "learning_rate": 2.594953564285998e-05, "loss": 0.606, "step": 7186 }, { "epoch": 1.3877196369955591, "grad_norm": 1.1941275596618652, "learning_rate": 2.5934763982887633e-05, "loss": 0.5368, "step": 7187 }, { "epoch": 1.3879127244641822, "grad_norm": 2.6998071670532227, "learning_rate": 2.5919995056116897e-05, "loss": 0.5928, "step": 7188 }, { "epoch": 1.3881058119328056, "grad_norm": 1.1887969970703125, "learning_rate": 2.5905228864225162e-05, "loss": 0.6206, "step": 7189 }, { "epoch": 1.388298899401429, "grad_norm": 1.242180585861206, "learning_rate": 2.5890465408889485e-05, "loss": 0.5843, "step": 7190 }, { "epoch": 1.388491986870052, "grad_norm": 1.745827317237854, "learning_rate": 2.5875704691786612e-05, "loss": 0.6286, "step": 7191 }, { "epoch": 1.3886850743386754, "grad_norm": 2.015536308288574, "learning_rate": 2.5860946714593014e-05, "loss": 0.5348, "step": 7192 }, { "epoch": 1.3888781618072987, "grad_norm": 1.022047996520996, "learning_rate": 2.584619147898481e-05, "loss": 0.5386, "step": 7193 }, { "epoch": 1.389071249275922, "grad_norm": 1.0599629878997803, "learning_rate": 2.5831438986637803e-05, "loss": 0.621, "step": 7194 }, { "epoch": 1.3892643367445454, "grad_norm": 1.269142985343933, "learning_rate": 2.5816689239227556e-05, "loss": 0.6223, "step": 7195 }, { "epoch": 1.3894574242131685, "grad_norm": 1.25253164768219, "learning_rate": 2.580194223842921e-05, "loss": 0.6243, "step": 7196 }, { "epoch": 1.3896505116817919, "grad_norm": 3.9151463508605957, "learning_rate": 2.5787197985917654e-05, "loss": 0.6064, "step": 7197 }, { "epoch": 1.3898435991504152, "grad_norm": 1.2939010858535767, "learning_rate": 2.5772456483367497e-05, "loss": 0.5782, "step": 7198 }, { "epoch": 1.3900366866190383, "grad_norm": 1.6130387783050537, "learning_rate": 2.575771773245298e-05, "loss": 0.6564, "step": 7199 }, { "epoch": 1.3902297740876617, "grad_norm": 1.0698161125183105, "learning_rate": 2.5742981734848043e-05, "loss": 0.5506, "step": 7200 }, { "epoch": 1.390422861556285, "grad_norm": 1.0572288036346436, "learning_rate": 2.572824849222637e-05, "loss": 0.5853, "step": 7201 }, { "epoch": 1.3906159490249084, "grad_norm": 1.4808921813964844, "learning_rate": 2.5713518006261206e-05, "loss": 0.5807, "step": 7202 }, { "epoch": 1.3908090364935315, "grad_norm": 1.1759649515151978, "learning_rate": 2.5698790278625628e-05, "loss": 0.613, "step": 7203 }, { "epoch": 1.3910021239621548, "grad_norm": 0.9239940643310547, "learning_rate": 2.5684065310992312e-05, "loss": 0.5981, "step": 7204 }, { "epoch": 1.3911952114307782, "grad_norm": 1.7404800653457642, "learning_rate": 2.566934310503361e-05, "loss": 0.6573, "step": 7205 }, { "epoch": 1.3913882988994013, "grad_norm": 1.1572734117507935, "learning_rate": 2.565462366242167e-05, "loss": 0.5728, "step": 7206 }, { "epoch": 1.3915813863680246, "grad_norm": 1.1285446882247925, "learning_rate": 2.5639906984828167e-05, "loss": 0.6202, "step": 7207 }, { "epoch": 1.391774473836648, "grad_norm": 1.5736162662506104, "learning_rate": 2.5625193073924564e-05, "loss": 0.592, "step": 7208 }, { "epoch": 1.3919675613052713, "grad_norm": 1.4421682357788086, "learning_rate": 2.5610481931382012e-05, "loss": 0.6123, "step": 7209 }, { "epoch": 1.3921606487738947, "grad_norm": 1.0424634218215942, "learning_rate": 2.559577355887132e-05, "loss": 0.6622, "step": 7210 }, { "epoch": 1.3923537362425178, "grad_norm": 2.804807424545288, "learning_rate": 2.5581067958062955e-05, "loss": 0.6065, "step": 7211 }, { "epoch": 1.3925468237111411, "grad_norm": 1.4408884048461914, "learning_rate": 2.556636513062717e-05, "loss": 0.6164, "step": 7212 }, { "epoch": 1.3927399111797645, "grad_norm": 1.0307608842849731, "learning_rate": 2.5551665078233737e-05, "loss": 0.6613, "step": 7213 }, { "epoch": 1.3929329986483876, "grad_norm": 1.458239197731018, "learning_rate": 2.5536967802552292e-05, "loss": 0.5891, "step": 7214 }, { "epoch": 1.393126086117011, "grad_norm": 1.397181749343872, "learning_rate": 2.5522273305252037e-05, "loss": 0.575, "step": 7215 }, { "epoch": 1.3933191735856343, "grad_norm": 1.3813152313232422, "learning_rate": 2.5507581588001884e-05, "loss": 0.6503, "step": 7216 }, { "epoch": 1.3935122610542576, "grad_norm": 1.3236960172653198, "learning_rate": 2.5492892652470475e-05, "loss": 0.6194, "step": 7217 }, { "epoch": 1.393705348522881, "grad_norm": 1.1951863765716553, "learning_rate": 2.54782065003261e-05, "loss": 0.6705, "step": 7218 }, { "epoch": 1.393898435991504, "grad_norm": 0.9848229289054871, "learning_rate": 2.5463523133236684e-05, "loss": 0.6155, "step": 7219 }, { "epoch": 1.3940915234601274, "grad_norm": 0.9134037494659424, "learning_rate": 2.5448842552869932e-05, "loss": 0.578, "step": 7220 }, { "epoch": 1.3942846109287508, "grad_norm": 2.8460330963134766, "learning_rate": 2.5434164760893175e-05, "loss": 0.6572, "step": 7221 }, { "epoch": 1.394477698397374, "grad_norm": 1.125827431678772, "learning_rate": 2.5419489758973426e-05, "loss": 0.5749, "step": 7222 }, { "epoch": 1.3946707858659972, "grad_norm": 1.1998932361602783, "learning_rate": 2.5404817548777438e-05, "loss": 0.5568, "step": 7223 }, { "epoch": 1.3948638733346206, "grad_norm": 1.0817437171936035, "learning_rate": 2.5390148131971525e-05, "loss": 0.646, "step": 7224 }, { "epoch": 1.395056960803244, "grad_norm": 1.1868494749069214, "learning_rate": 2.537548151022183e-05, "loss": 0.5783, "step": 7225 }, { "epoch": 1.3952500482718673, "grad_norm": 2.7052955627441406, "learning_rate": 2.536081768519409e-05, "loss": 0.5471, "step": 7226 }, { "epoch": 1.3954431357404904, "grad_norm": 1.429131031036377, "learning_rate": 2.534615665855373e-05, "loss": 0.6058, "step": 7227 }, { "epoch": 1.3956362232091137, "grad_norm": 0.9286193251609802, "learning_rate": 2.5331498431965872e-05, "loss": 0.6306, "step": 7228 }, { "epoch": 1.395829310677737, "grad_norm": 0.9416036009788513, "learning_rate": 2.531684300709537e-05, "loss": 0.6317, "step": 7229 }, { "epoch": 1.3960223981463602, "grad_norm": 1.0749506950378418, "learning_rate": 2.5302190385606622e-05, "loss": 0.6529, "step": 7230 }, { "epoch": 1.3962154856149835, "grad_norm": 1.4089443683624268, "learning_rate": 2.5287540569163857e-05, "loss": 0.6362, "step": 7231 }, { "epoch": 1.3964085730836069, "grad_norm": 1.0645248889923096, "learning_rate": 2.527289355943092e-05, "loss": 0.6832, "step": 7232 }, { "epoch": 1.3966016605522302, "grad_norm": 3.0412979125976562, "learning_rate": 2.5258249358071302e-05, "loss": 0.567, "step": 7233 }, { "epoch": 1.3967947480208536, "grad_norm": 1.3378846645355225, "learning_rate": 2.5243607966748257e-05, "loss": 0.5488, "step": 7234 }, { "epoch": 1.3969878354894767, "grad_norm": 2.047077178955078, "learning_rate": 2.522896938712468e-05, "loss": 0.6135, "step": 7235 }, { "epoch": 1.3971809229581, "grad_norm": 0.8050738573074341, "learning_rate": 2.5214333620863086e-05, "loss": 0.6225, "step": 7236 }, { "epoch": 1.3973740104267234, "grad_norm": 1.2449486255645752, "learning_rate": 2.519970066962578e-05, "loss": 0.6661, "step": 7237 }, { "epoch": 1.3975670978953465, "grad_norm": 1.1395807266235352, "learning_rate": 2.518507053507468e-05, "loss": 0.5995, "step": 7238 }, { "epoch": 1.3977601853639698, "grad_norm": 0.8328953385353088, "learning_rate": 2.5170443218871375e-05, "loss": 0.6072, "step": 7239 }, { "epoch": 1.3979532728325932, "grad_norm": 0.7913397550582886, "learning_rate": 2.5155818722677204e-05, "loss": 0.6213, "step": 7240 }, { "epoch": 1.3981463603012165, "grad_norm": 0.8924975991249084, "learning_rate": 2.5141197048153116e-05, "loss": 0.571, "step": 7241 }, { "epoch": 1.3983394477698399, "grad_norm": 0.888815701007843, "learning_rate": 2.512657819695976e-05, "loss": 0.6311, "step": 7242 }, { "epoch": 1.398532535238463, "grad_norm": 1.1690484285354614, "learning_rate": 2.5111962170757474e-05, "loss": 0.5733, "step": 7243 }, { "epoch": 1.3987256227070863, "grad_norm": 1.0612930059432983, "learning_rate": 2.509734897120624e-05, "loss": 0.5648, "step": 7244 }, { "epoch": 1.3989187101757097, "grad_norm": 0.8338414430618286, "learning_rate": 2.5082738599965793e-05, "loss": 0.6187, "step": 7245 }, { "epoch": 1.3991117976443328, "grad_norm": 1.056645154953003, "learning_rate": 2.5068131058695503e-05, "loss": 0.6553, "step": 7246 }, { "epoch": 1.3993048851129561, "grad_norm": 1.10726797580719, "learning_rate": 2.5053526349054357e-05, "loss": 0.579, "step": 7247 }, { "epoch": 1.3994979725815795, "grad_norm": 0.96147221326828, "learning_rate": 2.5038924472701137e-05, "loss": 0.6005, "step": 7248 }, { "epoch": 1.3996910600502028, "grad_norm": 0.9188192486763, "learning_rate": 2.5024325431294227e-05, "loss": 0.5783, "step": 7249 }, { "epoch": 1.399884147518826, "grad_norm": 0.853991687297821, "learning_rate": 2.5009729226491685e-05, "loss": 0.6217, "step": 7250 }, { "epoch": 1.4000772349874493, "grad_norm": 1.2045084238052368, "learning_rate": 2.4995135859951324e-05, "loss": 0.5489, "step": 7251 }, { "epoch": 1.4002703224560726, "grad_norm": 1.7792478799819946, "learning_rate": 2.498054533333054e-05, "loss": 0.5984, "step": 7252 }, { "epoch": 1.4004634099246958, "grad_norm": 0.9940049648284912, "learning_rate": 2.4965957648286468e-05, "loss": 0.6335, "step": 7253 }, { "epoch": 1.400656497393319, "grad_norm": 1.5169066190719604, "learning_rate": 2.4951372806475888e-05, "loss": 0.5945, "step": 7254 }, { "epoch": 1.4008495848619424, "grad_norm": 1.5905083417892456, "learning_rate": 2.493679080955525e-05, "loss": 0.6764, "step": 7255 }, { "epoch": 1.4010426723305658, "grad_norm": 0.8902570605278015, "learning_rate": 2.4922211659180745e-05, "loss": 0.582, "step": 7256 }, { "epoch": 1.4012357597991891, "grad_norm": 0.9723817110061646, "learning_rate": 2.490763535700817e-05, "loss": 0.5474, "step": 7257 }, { "epoch": 1.4014288472678122, "grad_norm": 0.7054423689842224, "learning_rate": 2.489306190469302e-05, "loss": 0.5368, "step": 7258 }, { "epoch": 1.4016219347364356, "grad_norm": 1.2641785144805908, "learning_rate": 2.4878491303890483e-05, "loss": 0.5666, "step": 7259 }, { "epoch": 1.401815022205059, "grad_norm": 1.023695468902588, "learning_rate": 2.4863923556255404e-05, "loss": 0.6071, "step": 7260 }, { "epoch": 1.402008109673682, "grad_norm": 1.793779730796814, "learning_rate": 2.4849358663442284e-05, "loss": 0.6155, "step": 7261 }, { "epoch": 1.4022011971423054, "grad_norm": 1.2125331163406372, "learning_rate": 2.4834796627105376e-05, "loss": 0.5815, "step": 7262 }, { "epoch": 1.4023942846109287, "grad_norm": 2.754511594772339, "learning_rate": 2.4820237448898532e-05, "loss": 0.6231, "step": 7263 }, { "epoch": 1.402587372079552, "grad_norm": 1.7298507690429688, "learning_rate": 2.4805681130475312e-05, "loss": 0.5491, "step": 7264 }, { "epoch": 1.4027804595481754, "grad_norm": 1.037244439125061, "learning_rate": 2.479112767348894e-05, "loss": 0.5758, "step": 7265 }, { "epoch": 1.4029735470167986, "grad_norm": 0.9598087072372437, "learning_rate": 2.477657707959232e-05, "loss": 0.6815, "step": 7266 }, { "epoch": 1.403166634485422, "grad_norm": 2.2563514709472656, "learning_rate": 2.4762029350438015e-05, "loss": 0.6283, "step": 7267 }, { "epoch": 1.4033597219540452, "grad_norm": 0.7329695820808411, "learning_rate": 2.4747484487678318e-05, "loss": 0.6366, "step": 7268 }, { "epoch": 1.4035528094226684, "grad_norm": 1.1879961490631104, "learning_rate": 2.4732942492965132e-05, "loss": 0.6233, "step": 7269 }, { "epoch": 1.4037458968912917, "grad_norm": 1.5370104312896729, "learning_rate": 2.471840336795007e-05, "loss": 0.6645, "step": 7270 }, { "epoch": 1.403938984359915, "grad_norm": 0.8674675226211548, "learning_rate": 2.47038671142844e-05, "loss": 0.6026, "step": 7271 }, { "epoch": 1.4041320718285384, "grad_norm": 0.96119624376297, "learning_rate": 2.468933373361906e-05, "loss": 0.5516, "step": 7272 }, { "epoch": 1.4043251592971617, "grad_norm": 0.8739451169967651, "learning_rate": 2.4674803227604696e-05, "loss": 0.552, "step": 7273 }, { "epoch": 1.4045182467657849, "grad_norm": 1.0406898260116577, "learning_rate": 2.4660275597891613e-05, "loss": 0.6249, "step": 7274 }, { "epoch": 1.4047113342344082, "grad_norm": 2.4040799140930176, "learning_rate": 2.4645750846129756e-05, "loss": 0.592, "step": 7275 }, { "epoch": 1.4049044217030315, "grad_norm": 0.9267615079879761, "learning_rate": 2.463122897396879e-05, "loss": 0.5856, "step": 7276 }, { "epoch": 1.4050975091716547, "grad_norm": 4.063787460327148, "learning_rate": 2.4616709983058018e-05, "loss": 0.6108, "step": 7277 }, { "epoch": 1.405290596640278, "grad_norm": 0.812868595123291, "learning_rate": 2.4602193875046414e-05, "loss": 0.5807, "step": 7278 }, { "epoch": 1.4054836841089013, "grad_norm": 1.4500566720962524, "learning_rate": 2.4587680651582684e-05, "loss": 0.6136, "step": 7279 }, { "epoch": 1.4056767715775247, "grad_norm": 0.7958862781524658, "learning_rate": 2.4573170314315135e-05, "loss": 0.6285, "step": 7280 }, { "epoch": 1.405869859046148, "grad_norm": 0.8850162029266357, "learning_rate": 2.4558662864891785e-05, "loss": 0.5628, "step": 7281 }, { "epoch": 1.4060629465147712, "grad_norm": 2.2331597805023193, "learning_rate": 2.45441583049603e-05, "loss": 0.7036, "step": 7282 }, { "epoch": 1.4062560339833945, "grad_norm": 2.0418450832366943, "learning_rate": 2.4529656636168025e-05, "loss": 0.6463, "step": 7283 }, { "epoch": 1.4064491214520178, "grad_norm": 3.904874801635742, "learning_rate": 2.4515157860162006e-05, "loss": 0.5956, "step": 7284 }, { "epoch": 1.406642208920641, "grad_norm": 0.8366175293922424, "learning_rate": 2.4500661978588935e-05, "loss": 0.6621, "step": 7285 }, { "epoch": 1.4068352963892643, "grad_norm": 1.2699881792068481, "learning_rate": 2.4486168993095165e-05, "loss": 0.647, "step": 7286 }, { "epoch": 1.4070283838578876, "grad_norm": 2.4108026027679443, "learning_rate": 2.4471678905326735e-05, "loss": 0.5824, "step": 7287 }, { "epoch": 1.407221471326511, "grad_norm": 0.8754356503486633, "learning_rate": 2.445719171692935e-05, "loss": 0.6556, "step": 7288 }, { "epoch": 1.4074145587951343, "grad_norm": 0.9332308769226074, "learning_rate": 2.444270742954838e-05, "loss": 0.6359, "step": 7289 }, { "epoch": 1.4076076462637575, "grad_norm": 0.9153667688369751, "learning_rate": 2.4428226044828896e-05, "loss": 0.5943, "step": 7290 }, { "epoch": 1.4078007337323808, "grad_norm": 1.6292598247528076, "learning_rate": 2.44137475644156e-05, "loss": 0.5799, "step": 7291 }, { "epoch": 1.4079938212010041, "grad_norm": 1.0152873992919922, "learning_rate": 2.4399271989952894e-05, "loss": 0.5718, "step": 7292 }, { "epoch": 1.4081869086696273, "grad_norm": 1.223770022392273, "learning_rate": 2.438479932308483e-05, "loss": 0.6533, "step": 7293 }, { "epoch": 1.4083799961382506, "grad_norm": 0.9373011589050293, "learning_rate": 2.4370329565455108e-05, "loss": 0.5972, "step": 7294 }, { "epoch": 1.408573083606874, "grad_norm": 1.5933189392089844, "learning_rate": 2.435586271870718e-05, "loss": 0.6484, "step": 7295 }, { "epoch": 1.4087661710754973, "grad_norm": 0.7736430764198303, "learning_rate": 2.4341398784484082e-05, "loss": 0.6093, "step": 7296 }, { "epoch": 1.4089592585441204, "grad_norm": 1.2565456628799438, "learning_rate": 2.4326937764428536e-05, "loss": 0.507, "step": 7297 }, { "epoch": 1.4091523460127438, "grad_norm": 1.181718111038208, "learning_rate": 2.4312479660183006e-05, "loss": 0.629, "step": 7298 }, { "epoch": 1.409345433481367, "grad_norm": 1.249761700630188, "learning_rate": 2.429802447338951e-05, "loss": 0.6163, "step": 7299 }, { "epoch": 1.4095385209499902, "grad_norm": 0.9109782576560974, "learning_rate": 2.4283572205689787e-05, "loss": 0.6614, "step": 7300 }, { "epoch": 1.4097316084186136, "grad_norm": 1.6022260189056396, "learning_rate": 2.4269122858725297e-05, "loss": 0.615, "step": 7301 }, { "epoch": 1.409924695887237, "grad_norm": 0.7729251384735107, "learning_rate": 2.4254676434137085e-05, "loss": 0.5875, "step": 7302 }, { "epoch": 1.4101177833558602, "grad_norm": 0.7735885977745056, "learning_rate": 2.4240232933565894e-05, "loss": 0.5894, "step": 7303 }, { "epoch": 1.4103108708244836, "grad_norm": 0.7663779854774475, "learning_rate": 2.4225792358652194e-05, "loss": 0.5389, "step": 7304 }, { "epoch": 1.4105039582931067, "grad_norm": 1.1854649782180786, "learning_rate": 2.4211354711035988e-05, "loss": 0.5902, "step": 7305 }, { "epoch": 1.41069704576173, "grad_norm": 0.8374058604240417, "learning_rate": 2.4196919992357086e-05, "loss": 0.5981, "step": 7306 }, { "epoch": 1.4108901332303534, "grad_norm": 2.0208699703216553, "learning_rate": 2.4182488204254895e-05, "loss": 0.5737, "step": 7307 }, { "epoch": 1.4110832206989765, "grad_norm": 0.798764705657959, "learning_rate": 2.4168059348368488e-05, "loss": 0.6704, "step": 7308 }, { "epoch": 1.4112763081675999, "grad_norm": 0.6810080409049988, "learning_rate": 2.415363342633663e-05, "loss": 0.6396, "step": 7309 }, { "epoch": 1.4114693956362232, "grad_norm": 1.596165657043457, "learning_rate": 2.4139210439797733e-05, "loss": 0.5623, "step": 7310 }, { "epoch": 1.4116624831048465, "grad_norm": 0.8644590377807617, "learning_rate": 2.412479039038986e-05, "loss": 0.6926, "step": 7311 }, { "epoch": 1.4118555705734699, "grad_norm": 0.8047739863395691, "learning_rate": 2.411037327975082e-05, "loss": 0.6043, "step": 7312 }, { "epoch": 1.412048658042093, "grad_norm": 0.9400693774223328, "learning_rate": 2.409595910951799e-05, "loss": 0.6399, "step": 7313 }, { "epoch": 1.4122417455107164, "grad_norm": 1.0097577571868896, "learning_rate": 2.4081547881328452e-05, "loss": 0.5909, "step": 7314 }, { "epoch": 1.4124348329793397, "grad_norm": 0.8948362469673157, "learning_rate": 2.406713959681901e-05, "loss": 0.6929, "step": 7315 }, { "epoch": 1.4126279204479628, "grad_norm": 0.7854083180427551, "learning_rate": 2.4052734257626013e-05, "loss": 0.6171, "step": 7316 }, { "epoch": 1.4128210079165862, "grad_norm": 0.7291311025619507, "learning_rate": 2.403833186538556e-05, "loss": 0.5913, "step": 7317 }, { "epoch": 1.4130140953852095, "grad_norm": 2.0201127529144287, "learning_rate": 2.4023932421733425e-05, "loss": 0.6106, "step": 7318 }, { "epoch": 1.4132071828538328, "grad_norm": 0.7972196936607361, "learning_rate": 2.4009535928305003e-05, "loss": 0.5745, "step": 7319 }, { "epoch": 1.4134002703224562, "grad_norm": 1.3446781635284424, "learning_rate": 2.3995142386735354e-05, "loss": 0.6151, "step": 7320 }, { "epoch": 1.4135933577910793, "grad_norm": 1.173588514328003, "learning_rate": 2.3980751798659274e-05, "loss": 0.6447, "step": 7321 }, { "epoch": 1.4137864452597027, "grad_norm": 2.028104305267334, "learning_rate": 2.3966364165711096e-05, "loss": 0.596, "step": 7322 }, { "epoch": 1.413979532728326, "grad_norm": 0.7951305508613586, "learning_rate": 2.395197948952495e-05, "loss": 0.6223, "step": 7323 }, { "epoch": 1.4141726201969491, "grad_norm": 1.0766853094100952, "learning_rate": 2.393759777173455e-05, "loss": 0.6097, "step": 7324 }, { "epoch": 1.4143657076655725, "grad_norm": 0.613574743270874, "learning_rate": 2.3923219013973276e-05, "loss": 0.6021, "step": 7325 }, { "epoch": 1.4145587951341958, "grad_norm": 0.7521234750747681, "learning_rate": 2.3908843217874223e-05, "loss": 0.6368, "step": 7326 }, { "epoch": 1.4147518826028191, "grad_norm": 0.8503754734992981, "learning_rate": 2.3894470385070127e-05, "loss": 0.6689, "step": 7327 }, { "epoch": 1.4149449700714425, "grad_norm": 0.7848847508430481, "learning_rate": 2.3880100517193315e-05, "loss": 0.6086, "step": 7328 }, { "epoch": 1.4151380575400656, "grad_norm": 0.9651969075202942, "learning_rate": 2.3865733615875902e-05, "loss": 0.5511, "step": 7329 }, { "epoch": 1.415331145008689, "grad_norm": 1.2042673826217651, "learning_rate": 2.385136968274958e-05, "loss": 0.6112, "step": 7330 }, { "epoch": 1.4155242324773123, "grad_norm": 0.9401514530181885, "learning_rate": 2.3837008719445714e-05, "loss": 0.5791, "step": 7331 }, { "epoch": 1.4157173199459354, "grad_norm": 1.023417353630066, "learning_rate": 2.3822650727595398e-05, "loss": 0.6263, "step": 7332 }, { "epoch": 1.4159104074145588, "grad_norm": 2.0102436542510986, "learning_rate": 2.3808295708829266e-05, "loss": 0.6061, "step": 7333 }, { "epoch": 1.416103494883182, "grad_norm": 0.8813250064849854, "learning_rate": 2.3793943664777736e-05, "loss": 0.6147, "step": 7334 }, { "epoch": 1.4162965823518054, "grad_norm": 0.9651735424995422, "learning_rate": 2.3779594597070826e-05, "loss": 0.6209, "step": 7335 }, { "epoch": 1.4164896698204288, "grad_norm": 0.9170966744422913, "learning_rate": 2.3765248507338195e-05, "loss": 0.568, "step": 7336 }, { "epoch": 1.416682757289052, "grad_norm": 1.0011768341064453, "learning_rate": 2.3750905397209243e-05, "loss": 0.5856, "step": 7337 }, { "epoch": 1.4168758447576753, "grad_norm": 0.9050262570381165, "learning_rate": 2.3736565268312987e-05, "loss": 0.5576, "step": 7338 }, { "epoch": 1.4170689322262986, "grad_norm": 0.9247788190841675, "learning_rate": 2.372222812227804e-05, "loss": 0.5736, "step": 7339 }, { "epoch": 1.4172620196949217, "grad_norm": 1.035683274269104, "learning_rate": 2.3707893960732797e-05, "loss": 0.6135, "step": 7340 }, { "epoch": 1.417455107163545, "grad_norm": 1.240851879119873, "learning_rate": 2.3693562785305234e-05, "loss": 0.6188, "step": 7341 }, { "epoch": 1.4176481946321684, "grad_norm": 0.6330093741416931, "learning_rate": 2.3679234597622996e-05, "loss": 0.6025, "step": 7342 }, { "epoch": 1.4178412821007917, "grad_norm": 0.861304521560669, "learning_rate": 2.3664909399313434e-05, "loss": 0.6328, "step": 7343 }, { "epoch": 1.4180343695694149, "grad_norm": 0.712585985660553, "learning_rate": 2.3650587192003516e-05, "loss": 0.5937, "step": 7344 }, { "epoch": 1.4182274570380382, "grad_norm": 1.5152524709701538, "learning_rate": 2.363626797731988e-05, "loss": 0.6142, "step": 7345 }, { "epoch": 1.4184205445066616, "grad_norm": 2.488804578781128, "learning_rate": 2.3621951756888822e-05, "loss": 0.6456, "step": 7346 }, { "epoch": 1.4186136319752847, "grad_norm": 1.5365188121795654, "learning_rate": 2.3607638532336292e-05, "loss": 0.6028, "step": 7347 }, { "epoch": 1.418806719443908, "grad_norm": 2.5076546669006348, "learning_rate": 2.3593328305287937e-05, "loss": 0.5338, "step": 7348 }, { "epoch": 1.4189998069125314, "grad_norm": 0.8351921439170837, "learning_rate": 2.3579021077369046e-05, "loss": 0.5789, "step": 7349 }, { "epoch": 1.4191928943811547, "grad_norm": 0.9254668951034546, "learning_rate": 2.3564716850204495e-05, "loss": 0.6234, "step": 7350 }, { "epoch": 1.419385981849778, "grad_norm": 0.584664523601532, "learning_rate": 2.3550415625418942e-05, "loss": 0.5666, "step": 7351 }, { "epoch": 1.4195790693184012, "grad_norm": 1.7093652486801147, "learning_rate": 2.353611740463662e-05, "loss": 0.6314, "step": 7352 }, { "epoch": 1.4197721567870245, "grad_norm": 1.195844054222107, "learning_rate": 2.3521822189481435e-05, "loss": 0.6209, "step": 7353 }, { "epoch": 1.4199652442556479, "grad_norm": 1.1236965656280518, "learning_rate": 2.3507529981576996e-05, "loss": 0.5504, "step": 7354 }, { "epoch": 1.420158331724271, "grad_norm": 1.4795666933059692, "learning_rate": 2.3493240782546533e-05, "loss": 0.5883, "step": 7355 }, { "epoch": 1.4203514191928943, "grad_norm": 0.8199212551116943, "learning_rate": 2.347895459401288e-05, "loss": 0.5745, "step": 7356 }, { "epoch": 1.4205445066615177, "grad_norm": 2.1288747787475586, "learning_rate": 2.346467141759865e-05, "loss": 0.626, "step": 7357 }, { "epoch": 1.420737594130141, "grad_norm": 1.3846244812011719, "learning_rate": 2.3450391254926024e-05, "loss": 0.6029, "step": 7358 }, { "epoch": 1.4209306815987643, "grad_norm": 0.7044923305511475, "learning_rate": 2.3436114107616846e-05, "loss": 0.6024, "step": 7359 }, { "epoch": 1.4211237690673875, "grad_norm": 0.875550389289856, "learning_rate": 2.3421839977292686e-05, "loss": 0.6293, "step": 7360 }, { "epoch": 1.4213168565360108, "grad_norm": 0.6743024587631226, "learning_rate": 2.3407568865574704e-05, "loss": 0.6782, "step": 7361 }, { "epoch": 1.4215099440046342, "grad_norm": 0.9789609909057617, "learning_rate": 2.3393300774083727e-05, "loss": 0.6283, "step": 7362 }, { "epoch": 1.4217030314732573, "grad_norm": 1.573602318763733, "learning_rate": 2.3379035704440256e-05, "loss": 0.6204, "step": 7363 }, { "epoch": 1.4218961189418806, "grad_norm": 1.2780990600585938, "learning_rate": 2.3364773658264428e-05, "loss": 0.6459, "step": 7364 }, { "epoch": 1.422089206410504, "grad_norm": 0.9393353462219238, "learning_rate": 2.3350514637176074e-05, "loss": 0.6886, "step": 7365 }, { "epoch": 1.4222822938791273, "grad_norm": 1.6912211179733276, "learning_rate": 2.333625864279465e-05, "loss": 0.6263, "step": 7366 }, { "epoch": 1.4224753813477506, "grad_norm": 1.4792068004608154, "learning_rate": 2.332200567673928e-05, "loss": 0.6655, "step": 7367 }, { "epoch": 1.4226684688163738, "grad_norm": 1.0498303174972534, "learning_rate": 2.3307755740628724e-05, "loss": 0.6089, "step": 7368 }, { "epoch": 1.422861556284997, "grad_norm": 1.399096965789795, "learning_rate": 2.3293508836081423e-05, "loss": 0.6123, "step": 7369 }, { "epoch": 1.4230546437536205, "grad_norm": 0.7307160496711731, "learning_rate": 2.3279264964715454e-05, "loss": 0.6334, "step": 7370 }, { "epoch": 1.4232477312222436, "grad_norm": 1.3555041551589966, "learning_rate": 2.3265024128148588e-05, "loss": 0.6244, "step": 7371 }, { "epoch": 1.423440818690867, "grad_norm": 0.7267115712165833, "learning_rate": 2.3250786327998204e-05, "loss": 0.5592, "step": 7372 }, { "epoch": 1.4236339061594903, "grad_norm": 1.0435391664505005, "learning_rate": 2.323655156588136e-05, "loss": 0.6475, "step": 7373 }, { "epoch": 1.4238269936281136, "grad_norm": 0.8128313422203064, "learning_rate": 2.3222319843414763e-05, "loss": 0.6766, "step": 7374 }, { "epoch": 1.424020081096737, "grad_norm": 6.083514213562012, "learning_rate": 2.3208091162214756e-05, "loss": 0.56, "step": 7375 }, { "epoch": 1.42421316856536, "grad_norm": 1.017595887184143, "learning_rate": 2.3193865523897402e-05, "loss": 0.6031, "step": 7376 }, { "epoch": 1.4244062560339834, "grad_norm": 0.7751060128211975, "learning_rate": 2.3179642930078343e-05, "loss": 0.5407, "step": 7377 }, { "epoch": 1.4245993435026068, "grad_norm": 1.6931145191192627, "learning_rate": 2.3165423382372915e-05, "loss": 0.6106, "step": 7378 }, { "epoch": 1.4247924309712299, "grad_norm": 0.7946437001228333, "learning_rate": 2.315120688239609e-05, "loss": 0.5625, "step": 7379 }, { "epoch": 1.4249855184398532, "grad_norm": 1.074411392211914, "learning_rate": 2.3136993431762515e-05, "loss": 0.5741, "step": 7380 }, { "epoch": 1.4251786059084766, "grad_norm": 0.8112348914146423, "learning_rate": 2.3122783032086447e-05, "loss": 0.5751, "step": 7381 }, { "epoch": 1.4253716933771, "grad_norm": 0.792572021484375, "learning_rate": 2.3108575684981876e-05, "loss": 0.6403, "step": 7382 }, { "epoch": 1.4255647808457232, "grad_norm": 0.875709593296051, "learning_rate": 2.309437139206237e-05, "loss": 0.6351, "step": 7383 }, { "epoch": 1.4257578683143464, "grad_norm": 1.6684366464614868, "learning_rate": 2.308017015494118e-05, "loss": 0.6634, "step": 7384 }, { "epoch": 1.4259509557829697, "grad_norm": 1.4871963262557983, "learning_rate": 2.3065971975231204e-05, "loss": 0.56, "step": 7385 }, { "epoch": 1.426144043251593, "grad_norm": 0.7173557877540588, "learning_rate": 2.305177685454498e-05, "loss": 0.55, "step": 7386 }, { "epoch": 1.4263371307202162, "grad_norm": 0.9970060586929321, "learning_rate": 2.3037584794494754e-05, "loss": 0.5826, "step": 7387 }, { "epoch": 1.4265302181888395, "grad_norm": 0.7386762499809265, "learning_rate": 2.3023395796692367e-05, "loss": 0.6099, "step": 7388 }, { "epoch": 1.4267233056574629, "grad_norm": 8.543675422668457, "learning_rate": 2.300920986274932e-05, "loss": 0.6853, "step": 7389 }, { "epoch": 1.4269163931260862, "grad_norm": 0.8586966395378113, "learning_rate": 2.2995026994276784e-05, "loss": 0.6322, "step": 7390 }, { "epoch": 1.4271094805947093, "grad_norm": 1.6868013143539429, "learning_rate": 2.298084719288558e-05, "loss": 0.5997, "step": 7391 }, { "epoch": 1.4273025680633327, "grad_norm": 1.5477826595306396, "learning_rate": 2.2966670460186142e-05, "loss": 0.623, "step": 7392 }, { "epoch": 1.427495655531956, "grad_norm": 1.0680872201919556, "learning_rate": 2.295249679778863e-05, "loss": 0.6121, "step": 7393 }, { "epoch": 1.4276887430005791, "grad_norm": 1.137036919593811, "learning_rate": 2.29383262073028e-05, "loss": 0.6074, "step": 7394 }, { "epoch": 1.4278818304692025, "grad_norm": 0.891628086566925, "learning_rate": 2.292415869033807e-05, "loss": 0.5996, "step": 7395 }, { "epoch": 1.4280749179378258, "grad_norm": 1.7063766717910767, "learning_rate": 2.2909994248503508e-05, "loss": 0.5898, "step": 7396 }, { "epoch": 1.4282680054064492, "grad_norm": 1.1588720083236694, "learning_rate": 2.2895832883407835e-05, "loss": 0.596, "step": 7397 }, { "epoch": 1.4284610928750725, "grad_norm": 0.932425320148468, "learning_rate": 2.2881674596659407e-05, "loss": 0.5625, "step": 7398 }, { "epoch": 1.4286541803436956, "grad_norm": 1.2401045560836792, "learning_rate": 2.2867519389866294e-05, "loss": 0.6655, "step": 7399 }, { "epoch": 1.428847267812319, "grad_norm": 0.8940187692642212, "learning_rate": 2.2853367264636132e-05, "loss": 0.5674, "step": 7400 }, { "epoch": 1.4290403552809423, "grad_norm": 1.5858137607574463, "learning_rate": 2.283921822257626e-05, "loss": 0.6553, "step": 7401 }, { "epoch": 1.4292334427495654, "grad_norm": 1.3800225257873535, "learning_rate": 2.282507226529364e-05, "loss": 0.6314, "step": 7402 }, { "epoch": 1.4294265302181888, "grad_norm": 2.2191660404205322, "learning_rate": 2.2810929394394886e-05, "loss": 0.6643, "step": 7403 }, { "epoch": 1.4296196176868121, "grad_norm": 0.8971442580223083, "learning_rate": 2.2796789611486303e-05, "loss": 0.6222, "step": 7404 }, { "epoch": 1.4298127051554355, "grad_norm": 1.0184376239776611, "learning_rate": 2.2782652918173784e-05, "loss": 0.608, "step": 7405 }, { "epoch": 1.4300057926240588, "grad_norm": 0.8633356094360352, "learning_rate": 2.2768519316062896e-05, "loss": 0.6078, "step": 7406 }, { "epoch": 1.430198880092682, "grad_norm": 1.2803882360458374, "learning_rate": 2.275438880675891e-05, "loss": 0.5848, "step": 7407 }, { "epoch": 1.4303919675613053, "grad_norm": 0.7052181363105774, "learning_rate": 2.2740261391866637e-05, "loss": 0.6544, "step": 7408 }, { "epoch": 1.4305850550299286, "grad_norm": 0.9178266525268555, "learning_rate": 2.2726137072990588e-05, "loss": 0.6379, "step": 7409 }, { "epoch": 1.4307781424985517, "grad_norm": 6.532191276550293, "learning_rate": 2.2712015851734976e-05, "loss": 0.6148, "step": 7410 }, { "epoch": 1.430971229967175, "grad_norm": 0.9071924090385437, "learning_rate": 2.2697897729703583e-05, "loss": 0.6062, "step": 7411 }, { "epoch": 1.4311643174357984, "grad_norm": 0.8872568011283875, "learning_rate": 2.2683782708499884e-05, "loss": 0.6483, "step": 7412 }, { "epoch": 1.4313574049044218, "grad_norm": 1.0369806289672852, "learning_rate": 2.2669670789726976e-05, "loss": 0.5405, "step": 7413 }, { "epoch": 1.431550492373045, "grad_norm": 1.0939829349517822, "learning_rate": 2.265556197498761e-05, "loss": 0.6687, "step": 7414 }, { "epoch": 1.4317435798416682, "grad_norm": 1.4351184368133545, "learning_rate": 2.2641456265884214e-05, "loss": 0.6319, "step": 7415 }, { "epoch": 1.4319366673102916, "grad_norm": 0.7090451717376709, "learning_rate": 2.262735366401883e-05, "loss": 0.6149, "step": 7416 }, { "epoch": 1.432129754778915, "grad_norm": 1.1701568365097046, "learning_rate": 2.261325417099313e-05, "loss": 0.6244, "step": 7417 }, { "epoch": 1.432322842247538, "grad_norm": 0.849638819694519, "learning_rate": 2.2599157788408528e-05, "loss": 0.5992, "step": 7418 }, { "epoch": 1.4325159297161614, "grad_norm": 0.83155757188797, "learning_rate": 2.258506451786595e-05, "loss": 0.6055, "step": 7419 }, { "epoch": 1.4327090171847847, "grad_norm": 1.0616378784179688, "learning_rate": 2.2570974360966035e-05, "loss": 0.595, "step": 7420 }, { "epoch": 1.432902104653408, "grad_norm": 1.157160997390747, "learning_rate": 2.2556887319309105e-05, "loss": 0.6308, "step": 7421 }, { "epoch": 1.4330951921220314, "grad_norm": 0.8602142930030823, "learning_rate": 2.2542803394495084e-05, "loss": 0.6522, "step": 7422 }, { "epoch": 1.4332882795906545, "grad_norm": 0.7415817379951477, "learning_rate": 2.2528722588123514e-05, "loss": 0.6295, "step": 7423 }, { "epoch": 1.4334813670592779, "grad_norm": 1.2196576595306396, "learning_rate": 2.2514644901793685e-05, "loss": 0.6649, "step": 7424 }, { "epoch": 1.4336744545279012, "grad_norm": 0.7771625518798828, "learning_rate": 2.2500570337104392e-05, "loss": 0.6071, "step": 7425 }, { "epoch": 1.4338675419965243, "grad_norm": 0.9007619619369507, "learning_rate": 2.2486498895654197e-05, "loss": 0.6169, "step": 7426 }, { "epoch": 1.4340606294651477, "grad_norm": 0.950972855091095, "learning_rate": 2.2472430579041247e-05, "loss": 0.5177, "step": 7427 }, { "epoch": 1.434253716933771, "grad_norm": 1.1717432737350464, "learning_rate": 2.245836538886333e-05, "loss": 0.6092, "step": 7428 }, { "epoch": 1.4344468044023944, "grad_norm": 0.7357762455940247, "learning_rate": 2.2444303326717924e-05, "loss": 0.6373, "step": 7429 }, { "epoch": 1.4346398918710177, "grad_norm": 0.7495874166488647, "learning_rate": 2.243024439420213e-05, "loss": 0.6235, "step": 7430 }, { "epoch": 1.4348329793396408, "grad_norm": 0.7725561857223511, "learning_rate": 2.2416188592912636e-05, "loss": 0.5966, "step": 7431 }, { "epoch": 1.4350260668082642, "grad_norm": 1.2832450866699219, "learning_rate": 2.2402135924445866e-05, "loss": 0.6206, "step": 7432 }, { "epoch": 1.4352191542768875, "grad_norm": 1.2146275043487549, "learning_rate": 2.238808639039784e-05, "loss": 0.6095, "step": 7433 }, { "epoch": 1.4354122417455106, "grad_norm": 0.9594151973724365, "learning_rate": 2.237403999236421e-05, "loss": 0.5456, "step": 7434 }, { "epoch": 1.435605329214134, "grad_norm": 1.1150282621383667, "learning_rate": 2.235999673194035e-05, "loss": 0.5917, "step": 7435 }, { "epoch": 1.4357984166827573, "grad_norm": 2.9833807945251465, "learning_rate": 2.234595661072113e-05, "loss": 0.6668, "step": 7436 }, { "epoch": 1.4359915041513807, "grad_norm": 0.7140949368476868, "learning_rate": 2.2331919630301225e-05, "loss": 0.6298, "step": 7437 }, { "epoch": 1.4361845916200038, "grad_norm": 0.9730687141418457, "learning_rate": 2.231788579227485e-05, "loss": 0.5926, "step": 7438 }, { "epoch": 1.4363776790886271, "grad_norm": 0.6492225527763367, "learning_rate": 2.2303855098235905e-05, "loss": 0.5655, "step": 7439 }, { "epoch": 1.4365707665572505, "grad_norm": 1.037927269935608, "learning_rate": 2.2289827549777893e-05, "loss": 0.5835, "step": 7440 }, { "epoch": 1.4367638540258736, "grad_norm": 0.7396480441093445, "learning_rate": 2.2275803148494047e-05, "loss": 0.5854, "step": 7441 }, { "epoch": 1.436956941494497, "grad_norm": 0.6533260941505432, "learning_rate": 2.2261781895977112e-05, "loss": 0.5965, "step": 7442 }, { "epoch": 1.4371500289631203, "grad_norm": 0.6887895464897156, "learning_rate": 2.2247763793819603e-05, "loss": 0.6663, "step": 7443 }, { "epoch": 1.4373431164317436, "grad_norm": 0.8802280426025391, "learning_rate": 2.22337488436136e-05, "loss": 0.6325, "step": 7444 }, { "epoch": 1.437536203900367, "grad_norm": 2.693922519683838, "learning_rate": 2.2219737046950834e-05, "loss": 0.5788, "step": 7445 }, { "epoch": 1.43772929136899, "grad_norm": 1.0704681873321533, "learning_rate": 2.2205728405422715e-05, "loss": 0.6031, "step": 7446 }, { "epoch": 1.4379223788376134, "grad_norm": 0.8695350885391235, "learning_rate": 2.2191722920620283e-05, "loss": 0.5601, "step": 7447 }, { "epoch": 1.4381154663062368, "grad_norm": 1.1785004138946533, "learning_rate": 2.2177720594134148e-05, "loss": 0.5755, "step": 7448 }, { "epoch": 1.4383085537748599, "grad_norm": 1.6611396074295044, "learning_rate": 2.2163721427554673e-05, "loss": 0.5946, "step": 7449 }, { "epoch": 1.4385016412434832, "grad_norm": 0.7881184816360474, "learning_rate": 2.214972542247179e-05, "loss": 0.5431, "step": 7450 }, { "epoch": 1.4386947287121066, "grad_norm": 0.5873980522155762, "learning_rate": 2.213573258047507e-05, "loss": 0.606, "step": 7451 }, { "epoch": 1.43888781618073, "grad_norm": 1.2613213062286377, "learning_rate": 2.2121742903153793e-05, "loss": 0.6413, "step": 7452 }, { "epoch": 1.4390809036493533, "grad_norm": 0.7472714781761169, "learning_rate": 2.2107756392096808e-05, "loss": 0.5642, "step": 7453 }, { "epoch": 1.4392739911179764, "grad_norm": 0.8400783538818359, "learning_rate": 2.209377304889262e-05, "loss": 0.6019, "step": 7454 }, { "epoch": 1.4394670785865997, "grad_norm": 0.8314379453659058, "learning_rate": 2.2079792875129396e-05, "loss": 0.5816, "step": 7455 }, { "epoch": 1.439660166055223, "grad_norm": 1.18849515914917, "learning_rate": 2.2065815872394902e-05, "loss": 0.5731, "step": 7456 }, { "epoch": 1.4398532535238462, "grad_norm": 0.628116250038147, "learning_rate": 2.205184204227661e-05, "loss": 0.5952, "step": 7457 }, { "epoch": 1.4400463409924695, "grad_norm": 0.6346133351325989, "learning_rate": 2.2037871386361602e-05, "loss": 0.6247, "step": 7458 }, { "epoch": 1.4402394284610929, "grad_norm": 1.341391682624817, "learning_rate": 2.2023903906236524e-05, "loss": 0.5902, "step": 7459 }, { "epoch": 1.4404325159297162, "grad_norm": 0.7562714219093323, "learning_rate": 2.200993960348779e-05, "loss": 0.672, "step": 7460 }, { "epoch": 1.4406256033983396, "grad_norm": 0.9828067421913147, "learning_rate": 2.1995978479701372e-05, "loss": 0.6155, "step": 7461 }, { "epoch": 1.4408186908669627, "grad_norm": 0.6510661244392395, "learning_rate": 2.198202053646288e-05, "loss": 0.607, "step": 7462 }, { "epoch": 1.441011778335586, "grad_norm": 0.9753535985946655, "learning_rate": 2.1968065775357627e-05, "loss": 0.6073, "step": 7463 }, { "epoch": 1.4412048658042094, "grad_norm": 0.7204359173774719, "learning_rate": 2.1954114197970487e-05, "loss": 0.546, "step": 7464 }, { "epoch": 1.4413979532728325, "grad_norm": 0.5444801449775696, "learning_rate": 2.1940165805886026e-05, "loss": 0.5513, "step": 7465 }, { "epoch": 1.4415910407414558, "grad_norm": 2.8570711612701416, "learning_rate": 2.192622060068841e-05, "loss": 0.619, "step": 7466 }, { "epoch": 1.4417841282100792, "grad_norm": 0.8875670433044434, "learning_rate": 2.1912278583961455e-05, "loss": 0.6154, "step": 7467 }, { "epoch": 1.4419772156787025, "grad_norm": 0.8994126319885254, "learning_rate": 2.1898339757288648e-05, "loss": 0.6459, "step": 7468 }, { "epoch": 1.4421703031473259, "grad_norm": 0.7481526732444763, "learning_rate": 2.1884404122253076e-05, "loss": 0.6333, "step": 7469 }, { "epoch": 1.442363390615949, "grad_norm": 0.6122217178344727, "learning_rate": 2.187047168043747e-05, "loss": 0.5613, "step": 7470 }, { "epoch": 1.4425564780845723, "grad_norm": 1.0264220237731934, "learning_rate": 2.185654243342421e-05, "loss": 0.5989, "step": 7471 }, { "epoch": 1.4427495655531957, "grad_norm": 1.185511827468872, "learning_rate": 2.18426163827953e-05, "loss": 0.6143, "step": 7472 }, { "epoch": 1.4429426530218188, "grad_norm": 2.548253059387207, "learning_rate": 2.1828693530132366e-05, "loss": 0.6252, "step": 7473 }, { "epoch": 1.4431357404904421, "grad_norm": 1.7783249616622925, "learning_rate": 2.1814773877016737e-05, "loss": 0.5981, "step": 7474 }, { "epoch": 1.4433288279590655, "grad_norm": 0.6974955201148987, "learning_rate": 2.1800857425029304e-05, "loss": 0.6086, "step": 7475 }, { "epoch": 1.4435219154276888, "grad_norm": 1.0644837617874146, "learning_rate": 2.1786944175750636e-05, "loss": 0.6531, "step": 7476 }, { "epoch": 1.4437150028963122, "grad_norm": 0.7310886383056641, "learning_rate": 2.177303413076091e-05, "loss": 0.5961, "step": 7477 }, { "epoch": 1.4439080903649353, "grad_norm": 2.620816707611084, "learning_rate": 2.175912729163997e-05, "loss": 0.6568, "step": 7478 }, { "epoch": 1.4441011778335586, "grad_norm": 0.7938036918640137, "learning_rate": 2.1745223659967255e-05, "loss": 0.5745, "step": 7479 }, { "epoch": 1.444294265302182, "grad_norm": 0.8588255643844604, "learning_rate": 2.1731323237321904e-05, "loss": 0.6454, "step": 7480 }, { "epoch": 1.444487352770805, "grad_norm": 0.6049890518188477, "learning_rate": 2.1717426025282633e-05, "loss": 0.6083, "step": 7481 }, { "epoch": 1.4446804402394284, "grad_norm": 1.2794673442840576, "learning_rate": 2.1703532025427818e-05, "loss": 0.5995, "step": 7482 }, { "epoch": 1.4448735277080518, "grad_norm": 0.7523152232170105, "learning_rate": 2.1689641239335463e-05, "loss": 0.5795, "step": 7483 }, { "epoch": 1.4450666151766751, "grad_norm": 0.8973371386528015, "learning_rate": 2.167575366858319e-05, "loss": 0.6485, "step": 7484 }, { "epoch": 1.4452597026452982, "grad_norm": 1.0577094554901123, "learning_rate": 2.1661869314748318e-05, "loss": 0.5743, "step": 7485 }, { "epoch": 1.4454527901139216, "grad_norm": 0.7673895359039307, "learning_rate": 2.1647988179407728e-05, "loss": 0.6936, "step": 7486 }, { "epoch": 1.445645877582545, "grad_norm": 0.5361132621765137, "learning_rate": 2.1634110264137976e-05, "loss": 0.5168, "step": 7487 }, { "epoch": 1.445838965051168, "grad_norm": 0.7013556361198425, "learning_rate": 2.162023557051524e-05, "loss": 0.6243, "step": 7488 }, { "epoch": 1.4460320525197914, "grad_norm": 0.7817001938819885, "learning_rate": 2.160636410011533e-05, "loss": 0.6134, "step": 7489 }, { "epoch": 1.4462251399884147, "grad_norm": 0.8809354901313782, "learning_rate": 2.1592495854513678e-05, "loss": 0.5783, "step": 7490 }, { "epoch": 1.446418227457038, "grad_norm": 0.7451736330986023, "learning_rate": 2.1578630835285402e-05, "loss": 0.6359, "step": 7491 }, { "epoch": 1.4466113149256614, "grad_norm": 0.8747856020927429, "learning_rate": 2.15647690440052e-05, "loss": 0.677, "step": 7492 }, { "epoch": 1.4468044023942845, "grad_norm": 6.949148654937744, "learning_rate": 2.1550910482247422e-05, "loss": 0.6267, "step": 7493 }, { "epoch": 1.4469974898629079, "grad_norm": 1.0316174030303955, "learning_rate": 2.1537055151586044e-05, "loss": 0.5636, "step": 7494 }, { "epoch": 1.4471905773315312, "grad_norm": 0.7936286926269531, "learning_rate": 2.152320305359467e-05, "loss": 0.6331, "step": 7495 }, { "epoch": 1.4473836648001543, "grad_norm": 0.7992208003997803, "learning_rate": 2.150935418984658e-05, "loss": 0.5715, "step": 7496 }, { "epoch": 1.4475767522687777, "grad_norm": 0.8017997145652771, "learning_rate": 2.1495508561914633e-05, "loss": 0.5809, "step": 7497 }, { "epoch": 1.447769839737401, "grad_norm": 1.5067468881607056, "learning_rate": 2.1481666171371352e-05, "loss": 0.6157, "step": 7498 }, { "epoch": 1.4479629272060244, "grad_norm": 0.6624324917793274, "learning_rate": 2.1467827019788867e-05, "loss": 0.5648, "step": 7499 }, { "epoch": 1.4481560146746477, "grad_norm": 1.0832674503326416, "learning_rate": 2.145399110873897e-05, "loss": 0.6168, "step": 7500 }, { "epoch": 1.4481560146746477, "eval_loss": 0.657865583896637, "eval_runtime": 49.4607, "eval_samples_per_second": 13.425, "eval_steps_per_second": 0.425, "step": 7500 }, { "epoch": 1.4483491021432708, "grad_norm": 1.0912233591079712, "learning_rate": 2.1440158439793045e-05, "loss": 0.6843, "step": 7501 }, { "epoch": 1.4485421896118942, "grad_norm": 0.6395469307899475, "learning_rate": 2.1426329014522168e-05, "loss": 0.6329, "step": 7502 }, { "epoch": 1.4487352770805175, "grad_norm": 0.7669790983200073, "learning_rate": 2.1412502834496996e-05, "loss": 0.5881, "step": 7503 }, { "epoch": 1.4489283645491406, "grad_norm": 0.8467268943786621, "learning_rate": 2.139867990128783e-05, "loss": 0.6762, "step": 7504 }, { "epoch": 1.449121452017764, "grad_norm": 0.8750420808792114, "learning_rate": 2.1384860216464613e-05, "loss": 0.6005, "step": 7505 }, { "epoch": 1.4493145394863873, "grad_norm": 0.6489734649658203, "learning_rate": 2.1371043781596888e-05, "loss": 0.6789, "step": 7506 }, { "epoch": 1.4495076269550107, "grad_norm": 1.3939435482025146, "learning_rate": 2.135723059825389e-05, "loss": 0.6633, "step": 7507 }, { "epoch": 1.449700714423634, "grad_norm": 0.788981020450592, "learning_rate": 2.1343420668004427e-05, "loss": 0.6697, "step": 7508 }, { "epoch": 1.4498938018922571, "grad_norm": 1.4310667514801025, "learning_rate": 2.1329613992416936e-05, "loss": 0.6042, "step": 7509 }, { "epoch": 1.4500868893608805, "grad_norm": 0.6710561513900757, "learning_rate": 2.131581057305957e-05, "loss": 0.5337, "step": 7510 }, { "epoch": 1.4502799768295038, "grad_norm": 1.0055561065673828, "learning_rate": 2.1302010411499984e-05, "loss": 0.6621, "step": 7511 }, { "epoch": 1.450473064298127, "grad_norm": 0.9661880135536194, "learning_rate": 2.1288213509305532e-05, "loss": 0.5816, "step": 7512 }, { "epoch": 1.4506661517667503, "grad_norm": 0.8521879315376282, "learning_rate": 2.1274419868043226e-05, "loss": 0.5854, "step": 7513 }, { "epoch": 1.4508592392353736, "grad_norm": 1.0409458875656128, "learning_rate": 2.126062948927966e-05, "loss": 0.543, "step": 7514 }, { "epoch": 1.451052326703997, "grad_norm": 0.9837067723274231, "learning_rate": 2.1246842374581055e-05, "loss": 0.6178, "step": 7515 }, { "epoch": 1.4512454141726203, "grad_norm": 0.9885408878326416, "learning_rate": 2.123305852551333e-05, "loss": 0.6219, "step": 7516 }, { "epoch": 1.4514385016412434, "grad_norm": 1.234715461730957, "learning_rate": 2.1219277943641902e-05, "loss": 0.6436, "step": 7517 }, { "epoch": 1.4516315891098668, "grad_norm": 0.6316896080970764, "learning_rate": 2.1205500630531962e-05, "loss": 0.5169, "step": 7518 }, { "epoch": 1.4518246765784901, "grad_norm": 0.8566652536392212, "learning_rate": 2.119172658774824e-05, "loss": 0.5777, "step": 7519 }, { "epoch": 1.4520177640471132, "grad_norm": 1.2311023473739624, "learning_rate": 2.1177955816855115e-05, "loss": 0.6409, "step": 7520 }, { "epoch": 1.4522108515157366, "grad_norm": 0.7048625946044922, "learning_rate": 2.1164188319416613e-05, "loss": 0.6346, "step": 7521 }, { "epoch": 1.45240393898436, "grad_norm": 0.8149752616882324, "learning_rate": 2.115042409699635e-05, "loss": 0.6075, "step": 7522 }, { "epoch": 1.4525970264529833, "grad_norm": 0.9506551623344421, "learning_rate": 2.11366631511576e-05, "loss": 0.6337, "step": 7523 }, { "epoch": 1.4527901139216066, "grad_norm": 0.9534814357757568, "learning_rate": 2.1122905483463273e-05, "loss": 0.6564, "step": 7524 }, { "epoch": 1.4529832013902297, "grad_norm": 1.2403541803359985, "learning_rate": 2.1109151095475887e-05, "loss": 0.5657, "step": 7525 }, { "epoch": 1.453176288858853, "grad_norm": 1.758823037147522, "learning_rate": 2.1095399988757574e-05, "loss": 0.5624, "step": 7526 }, { "epoch": 1.4533693763274764, "grad_norm": 0.9202455878257751, "learning_rate": 2.108165216487016e-05, "loss": 0.5848, "step": 7527 }, { "epoch": 1.4535624637960995, "grad_norm": 0.767284631729126, "learning_rate": 2.1067907625374994e-05, "loss": 0.6572, "step": 7528 }, { "epoch": 1.453755551264723, "grad_norm": 1.3798338174819946, "learning_rate": 2.105416637183311e-05, "loss": 0.6208, "step": 7529 }, { "epoch": 1.4539486387333462, "grad_norm": 0.7347443103790283, "learning_rate": 2.1040428405805207e-05, "loss": 0.5916, "step": 7530 }, { "epoch": 1.4541417262019696, "grad_norm": 0.8923291563987732, "learning_rate": 2.102669372885155e-05, "loss": 0.5835, "step": 7531 }, { "epoch": 1.4543348136705927, "grad_norm": 0.63727205991745, "learning_rate": 2.1012962342532033e-05, "loss": 0.6031, "step": 7532 }, { "epoch": 1.454527901139216, "grad_norm": 0.6918020248413086, "learning_rate": 2.0999234248406246e-05, "loss": 0.6094, "step": 7533 }, { "epoch": 1.4547209886078394, "grad_norm": 0.8933965563774109, "learning_rate": 2.098550944803328e-05, "loss": 0.6092, "step": 7534 }, { "epoch": 1.4549140760764625, "grad_norm": 1.4078824520111084, "learning_rate": 2.0971787942971977e-05, "loss": 0.5888, "step": 7535 }, { "epoch": 1.4551071635450858, "grad_norm": 0.8558027148246765, "learning_rate": 2.0958069734780743e-05, "loss": 0.6118, "step": 7536 }, { "epoch": 1.4553002510137092, "grad_norm": 0.8003568649291992, "learning_rate": 2.09443548250176e-05, "loss": 0.6428, "step": 7537 }, { "epoch": 1.4554933384823325, "grad_norm": 0.9647578001022339, "learning_rate": 2.0930643215240257e-05, "loss": 0.5808, "step": 7538 }, { "epoch": 1.4556864259509559, "grad_norm": 2.3024063110351562, "learning_rate": 2.0916934907005962e-05, "loss": 0.5742, "step": 7539 }, { "epoch": 1.455879513419579, "grad_norm": 0.793627917766571, "learning_rate": 2.0903229901871628e-05, "loss": 0.6718, "step": 7540 }, { "epoch": 1.4560726008882023, "grad_norm": 0.8181706070899963, "learning_rate": 2.0889528201393837e-05, "loss": 0.5827, "step": 7541 }, { "epoch": 1.4562656883568257, "grad_norm": 0.6459331512451172, "learning_rate": 2.0875829807128737e-05, "loss": 0.5407, "step": 7542 }, { "epoch": 1.4564587758254488, "grad_norm": 0.8143730759620667, "learning_rate": 2.0862134720632092e-05, "loss": 0.6563, "step": 7543 }, { "epoch": 1.4566518632940721, "grad_norm": 0.6437447667121887, "learning_rate": 2.0848442943459385e-05, "loss": 0.6298, "step": 7544 }, { "epoch": 1.4568449507626955, "grad_norm": 1.1056386232376099, "learning_rate": 2.083475447716557e-05, "loss": 0.5437, "step": 7545 }, { "epoch": 1.4570380382313188, "grad_norm": 1.4723912477493286, "learning_rate": 2.082106932330537e-05, "loss": 0.6087, "step": 7546 }, { "epoch": 1.4572311256999422, "grad_norm": 0.6730429530143738, "learning_rate": 2.0807387483433055e-05, "loss": 0.5975, "step": 7547 }, { "epoch": 1.4574242131685653, "grad_norm": 0.6269766688346863, "learning_rate": 2.0793708959102513e-05, "loss": 0.6574, "step": 7548 }, { "epoch": 1.4576173006371886, "grad_norm": 1.6771376132965088, "learning_rate": 2.0780033751867323e-05, "loss": 0.6771, "step": 7549 }, { "epoch": 1.457810388105812, "grad_norm": 0.8703603744506836, "learning_rate": 2.0766361863280637e-05, "loss": 0.5975, "step": 7550 }, { "epoch": 1.458003475574435, "grad_norm": 0.7860947251319885, "learning_rate": 2.075269329489518e-05, "loss": 0.6418, "step": 7551 }, { "epoch": 1.4581965630430584, "grad_norm": 0.676468551158905, "learning_rate": 2.0739028048263415e-05, "loss": 0.6126, "step": 7552 }, { "epoch": 1.4583896505116818, "grad_norm": 3.0541224479675293, "learning_rate": 2.072536612493734e-05, "loss": 0.5922, "step": 7553 }, { "epoch": 1.4585827379803051, "grad_norm": 1.2797425985336304, "learning_rate": 2.0711707526468595e-05, "loss": 0.6562, "step": 7554 }, { "epoch": 1.4587758254489285, "grad_norm": 0.7022029161453247, "learning_rate": 2.0698052254408483e-05, "loss": 0.6003, "step": 7555 }, { "epoch": 1.4589689129175516, "grad_norm": 0.6634694933891296, "learning_rate": 2.068440031030788e-05, "loss": 0.5888, "step": 7556 }, { "epoch": 1.459162000386175, "grad_norm": 2.4340834617614746, "learning_rate": 2.06707516957173e-05, "loss": 0.6244, "step": 7557 }, { "epoch": 1.4593550878547983, "grad_norm": 0.8417605757713318, "learning_rate": 2.065710641218688e-05, "loss": 0.593, "step": 7558 }, { "epoch": 1.4595481753234214, "grad_norm": 0.8082351088523865, "learning_rate": 2.0643464461266365e-05, "loss": 0.6433, "step": 7559 }, { "epoch": 1.4597412627920447, "grad_norm": 1.460741639137268, "learning_rate": 2.062982584450517e-05, "loss": 0.6321, "step": 7560 }, { "epoch": 1.459934350260668, "grad_norm": 0.7190249562263489, "learning_rate": 2.0616190563452293e-05, "loss": 0.6473, "step": 7561 }, { "epoch": 1.4601274377292914, "grad_norm": 0.8322178721427917, "learning_rate": 2.0602558619656304e-05, "loss": 0.5581, "step": 7562 }, { "epoch": 1.4603205251979148, "grad_norm": 3.8603503704071045, "learning_rate": 2.0588930014665502e-05, "loss": 0.5366, "step": 7563 }, { "epoch": 1.460513612666538, "grad_norm": 1.058937430381775, "learning_rate": 2.0575304750027736e-05, "loss": 0.6025, "step": 7564 }, { "epoch": 1.4607067001351612, "grad_norm": 0.7431814670562744, "learning_rate": 2.0561682827290463e-05, "loss": 0.6114, "step": 7565 }, { "epoch": 1.4608997876037846, "grad_norm": 0.7775586247444153, "learning_rate": 2.0548064248000837e-05, "loss": 0.6211, "step": 7566 }, { "epoch": 1.4610928750724077, "grad_norm": 0.7694734930992126, "learning_rate": 2.0534449013705574e-05, "loss": 0.6438, "step": 7567 }, { "epoch": 1.461285962541031, "grad_norm": 1.4164105653762817, "learning_rate": 2.052083712595098e-05, "loss": 0.613, "step": 7568 }, { "epoch": 1.4614790500096544, "grad_norm": 0.680582582950592, "learning_rate": 2.0507228586283056e-05, "loss": 0.5672, "step": 7569 }, { "epoch": 1.4616721374782777, "grad_norm": 1.0253636837005615, "learning_rate": 2.0493623396247386e-05, "loss": 0.6254, "step": 7570 }, { "epoch": 1.461865224946901, "grad_norm": 0.9639779329299927, "learning_rate": 2.0480021557389144e-05, "loss": 0.5763, "step": 7571 }, { "epoch": 1.4620583124155242, "grad_norm": 1.7410675287246704, "learning_rate": 2.0466423071253205e-05, "loss": 0.6076, "step": 7572 }, { "epoch": 1.4622513998841475, "grad_norm": 0.9355152249336243, "learning_rate": 2.045282793938398e-05, "loss": 0.5728, "step": 7573 }, { "epoch": 1.4624444873527709, "grad_norm": 0.7393012046813965, "learning_rate": 2.043923616332554e-05, "loss": 0.5321, "step": 7574 }, { "epoch": 1.462637574821394, "grad_norm": 1.6499944925308228, "learning_rate": 2.0425647744621566e-05, "loss": 0.659, "step": 7575 }, { "epoch": 1.4628306622900173, "grad_norm": 0.7106432914733887, "learning_rate": 2.0412062684815343e-05, "loss": 0.6034, "step": 7576 }, { "epoch": 1.4630237497586407, "grad_norm": 1.1482173204421997, "learning_rate": 2.0398480985449813e-05, "loss": 0.5807, "step": 7577 }, { "epoch": 1.463216837227264, "grad_norm": 0.7239591479301453, "learning_rate": 2.038490264806751e-05, "loss": 0.6368, "step": 7578 }, { "epoch": 1.4634099246958872, "grad_norm": 1.1856019496917725, "learning_rate": 2.037132767421059e-05, "loss": 0.6512, "step": 7579 }, { "epoch": 1.4636030121645105, "grad_norm": 0.9351349472999573, "learning_rate": 2.0357756065420813e-05, "loss": 0.6378, "step": 7580 }, { "epoch": 1.4637960996331338, "grad_norm": 0.6513811945915222, "learning_rate": 2.034418782323958e-05, "loss": 0.5845, "step": 7581 }, { "epoch": 1.463989187101757, "grad_norm": 0.643560528755188, "learning_rate": 2.0330622949207877e-05, "loss": 0.6161, "step": 7582 }, { "epoch": 1.4641822745703803, "grad_norm": 1.6937077045440674, "learning_rate": 2.031706144486637e-05, "loss": 0.5535, "step": 7583 }, { "epoch": 1.4643753620390036, "grad_norm": 0.7436890006065369, "learning_rate": 2.0303503311755285e-05, "loss": 0.583, "step": 7584 }, { "epoch": 1.464568449507627, "grad_norm": 1.6064976453781128, "learning_rate": 2.0289948551414483e-05, "loss": 0.6616, "step": 7585 }, { "epoch": 1.4647615369762503, "grad_norm": 0.755965530872345, "learning_rate": 2.0276397165383437e-05, "loss": 0.537, "step": 7586 }, { "epoch": 1.4649546244448735, "grad_norm": 0.7035666704177856, "learning_rate": 2.0262849155201226e-05, "loss": 0.6012, "step": 7587 }, { "epoch": 1.4651477119134968, "grad_norm": 1.0049515962600708, "learning_rate": 2.0249304522406598e-05, "loss": 0.6036, "step": 7588 }, { "epoch": 1.4653407993821201, "grad_norm": 1.1502602100372314, "learning_rate": 2.023576326853786e-05, "loss": 0.6264, "step": 7589 }, { "epoch": 1.4655338868507433, "grad_norm": 1.1017570495605469, "learning_rate": 2.022222539513296e-05, "loss": 0.6234, "step": 7590 }, { "epoch": 1.4657269743193666, "grad_norm": 0.961283802986145, "learning_rate": 2.0208690903729454e-05, "loss": 0.6712, "step": 7591 }, { "epoch": 1.46592006178799, "grad_norm": 1.1196227073669434, "learning_rate": 2.0195159795864514e-05, "loss": 0.5419, "step": 7592 }, { "epoch": 1.4661131492566133, "grad_norm": 0.7973318099975586, "learning_rate": 2.0181632073074926e-05, "loss": 0.6362, "step": 7593 }, { "epoch": 1.4663062367252366, "grad_norm": 0.9557600617408752, "learning_rate": 2.0168107736897122e-05, "loss": 0.5452, "step": 7594 }, { "epoch": 1.4664993241938598, "grad_norm": 0.7932430505752563, "learning_rate": 2.0154586788867107e-05, "loss": 0.585, "step": 7595 }, { "epoch": 1.466692411662483, "grad_norm": 0.5688921213150024, "learning_rate": 2.014106923052052e-05, "loss": 0.5821, "step": 7596 }, { "epoch": 1.4668854991311064, "grad_norm": 3.4917373657226562, "learning_rate": 2.0127555063392617e-05, "loss": 0.6156, "step": 7597 }, { "epoch": 1.4670785865997296, "grad_norm": 1.115228533744812, "learning_rate": 2.0114044289018248e-05, "loss": 0.6525, "step": 7598 }, { "epoch": 1.467271674068353, "grad_norm": 0.7052868008613586, "learning_rate": 2.0100536908931916e-05, "loss": 0.5791, "step": 7599 }, { "epoch": 1.4674647615369762, "grad_norm": 0.5685085654258728, "learning_rate": 2.0087032924667722e-05, "loss": 0.6598, "step": 7600 }, { "epoch": 1.4676578490055996, "grad_norm": 1.7036482095718384, "learning_rate": 2.0073532337759356e-05, "loss": 0.5999, "step": 7601 }, { "epoch": 1.467850936474223, "grad_norm": 0.6933913230895996, "learning_rate": 2.0060035149740158e-05, "loss": 0.5787, "step": 7602 }, { "epoch": 1.468044023942846, "grad_norm": 0.7832648158073425, "learning_rate": 2.0046541362143056e-05, "loss": 0.5966, "step": 7603 }, { "epoch": 1.4682371114114694, "grad_norm": 1.1907753944396973, "learning_rate": 2.0033050976500594e-05, "loss": 0.612, "step": 7604 }, { "epoch": 1.4684301988800927, "grad_norm": 1.1214447021484375, "learning_rate": 2.001956399434497e-05, "loss": 0.5314, "step": 7605 }, { "epoch": 1.4686232863487159, "grad_norm": 1.5934858322143555, "learning_rate": 2.0006080417207945e-05, "loss": 0.6756, "step": 7606 }, { "epoch": 1.4688163738173392, "grad_norm": 2.0165257453918457, "learning_rate": 1.9992600246620912e-05, "loss": 0.6391, "step": 7607 }, { "epoch": 1.4690094612859625, "grad_norm": 0.7123590707778931, "learning_rate": 1.997912348411488e-05, "loss": 0.6667, "step": 7608 }, { "epoch": 1.469202548754586, "grad_norm": 0.9856369495391846, "learning_rate": 1.9965650131220465e-05, "loss": 0.6098, "step": 7609 }, { "epoch": 1.4693956362232092, "grad_norm": 0.7483574151992798, "learning_rate": 1.9952180189467884e-05, "loss": 0.5903, "step": 7610 }, { "epoch": 1.4695887236918324, "grad_norm": 1.1411768198013306, "learning_rate": 1.9938713660387016e-05, "loss": 0.589, "step": 7611 }, { "epoch": 1.4697818111604557, "grad_norm": 0.9072834253311157, "learning_rate": 1.9925250545507294e-05, "loss": 0.626, "step": 7612 }, { "epoch": 1.469974898629079, "grad_norm": 1.1118675470352173, "learning_rate": 1.9911790846357798e-05, "loss": 0.6573, "step": 7613 }, { "epoch": 1.4701679860977022, "grad_norm": 0.7415522336959839, "learning_rate": 1.9898334564467196e-05, "loss": 0.5723, "step": 7614 }, { "epoch": 1.4703610735663255, "grad_norm": 0.7620511651039124, "learning_rate": 1.9884881701363776e-05, "loss": 0.5621, "step": 7615 }, { "epoch": 1.4705541610349488, "grad_norm": 0.7772712707519531, "learning_rate": 1.9871432258575474e-05, "loss": 0.5371, "step": 7616 }, { "epoch": 1.4707472485035722, "grad_norm": 0.7326335310935974, "learning_rate": 1.9857986237629784e-05, "loss": 0.6017, "step": 7617 }, { "epoch": 1.4709403359721955, "grad_norm": 1.8565049171447754, "learning_rate": 1.984454364005382e-05, "loss": 0.6392, "step": 7618 }, { "epoch": 1.4711334234408187, "grad_norm": 0.942661464214325, "learning_rate": 1.9831104467374374e-05, "loss": 0.5612, "step": 7619 }, { "epoch": 1.471326510909442, "grad_norm": 1.9828441143035889, "learning_rate": 1.9817668721117743e-05, "loss": 0.595, "step": 7620 }, { "epoch": 1.4715195983780653, "grad_norm": 0.703860878944397, "learning_rate": 1.9804236402809884e-05, "loss": 0.6059, "step": 7621 }, { "epoch": 1.4717126858466885, "grad_norm": 0.90121990442276, "learning_rate": 1.9790807513976407e-05, "loss": 0.593, "step": 7622 }, { "epoch": 1.4719057733153118, "grad_norm": 0.8056994676589966, "learning_rate": 1.9777382056142474e-05, "loss": 0.6658, "step": 7623 }, { "epoch": 1.4720988607839351, "grad_norm": 0.8668590188026428, "learning_rate": 1.976396003083288e-05, "loss": 0.6483, "step": 7624 }, { "epoch": 1.4722919482525585, "grad_norm": 0.7113983035087585, "learning_rate": 1.9750541439572023e-05, "loss": 0.592, "step": 7625 }, { "epoch": 1.4724850357211816, "grad_norm": 0.8323966860771179, "learning_rate": 1.9737126283883904e-05, "loss": 0.5483, "step": 7626 }, { "epoch": 1.472678123189805, "grad_norm": 0.8882771134376526, "learning_rate": 1.9723714565292178e-05, "loss": 0.5684, "step": 7627 }, { "epoch": 1.4728712106584283, "grad_norm": 1.3552470207214355, "learning_rate": 1.9710306285320053e-05, "loss": 0.5861, "step": 7628 }, { "epoch": 1.4730642981270514, "grad_norm": 1.2400879859924316, "learning_rate": 1.9696901445490362e-05, "loss": 0.6084, "step": 7629 }, { "epoch": 1.4732573855956748, "grad_norm": 0.7703557014465332, "learning_rate": 1.96835000473256e-05, "loss": 0.615, "step": 7630 }, { "epoch": 1.473450473064298, "grad_norm": 1.1265411376953125, "learning_rate": 1.967010209234778e-05, "loss": 0.606, "step": 7631 }, { "epoch": 1.4736435605329214, "grad_norm": 0.8603948354721069, "learning_rate": 1.9656707582078565e-05, "loss": 0.5997, "step": 7632 }, { "epoch": 1.4738366480015448, "grad_norm": 0.6453964710235596, "learning_rate": 1.964331651803927e-05, "loss": 0.6112, "step": 7633 }, { "epoch": 1.474029735470168, "grad_norm": 0.8563664555549622, "learning_rate": 1.962992890175077e-05, "loss": 0.628, "step": 7634 }, { "epoch": 1.4742228229387913, "grad_norm": 0.8700339794158936, "learning_rate": 1.9616544734733534e-05, "loss": 0.6498, "step": 7635 }, { "epoch": 1.4744159104074146, "grad_norm": 1.1278752088546753, "learning_rate": 1.9603164018507718e-05, "loss": 0.5696, "step": 7636 }, { "epoch": 1.4746089978760377, "grad_norm": 0.7954839468002319, "learning_rate": 1.958978675459296e-05, "loss": 0.6599, "step": 7637 }, { "epoch": 1.474802085344661, "grad_norm": 0.9367102384567261, "learning_rate": 1.957641294450864e-05, "loss": 0.6239, "step": 7638 }, { "epoch": 1.4749951728132844, "grad_norm": 0.7267718315124512, "learning_rate": 1.9563042589773656e-05, "loss": 0.6125, "step": 7639 }, { "epoch": 1.4751882602819077, "grad_norm": 0.6547189950942993, "learning_rate": 1.9549675691906532e-05, "loss": 0.6603, "step": 7640 }, { "epoch": 1.475381347750531, "grad_norm": 0.7339242100715637, "learning_rate": 1.953631225242546e-05, "loss": 0.6367, "step": 7641 }, { "epoch": 1.4755744352191542, "grad_norm": 0.8582415580749512, "learning_rate": 1.952295227284813e-05, "loss": 0.6047, "step": 7642 }, { "epoch": 1.4757675226877776, "grad_norm": 0.6439670324325562, "learning_rate": 1.9509595754691906e-05, "loss": 0.5853, "step": 7643 }, { "epoch": 1.475960610156401, "grad_norm": 1.196192741394043, "learning_rate": 1.9496242699473783e-05, "loss": 0.6215, "step": 7644 }, { "epoch": 1.476153697625024, "grad_norm": 3.070265531539917, "learning_rate": 1.9482893108710316e-05, "loss": 0.7003, "step": 7645 }, { "epoch": 1.4763467850936474, "grad_norm": 1.6491619348526, "learning_rate": 1.9469546983917652e-05, "loss": 0.5912, "step": 7646 }, { "epoch": 1.4765398725622707, "grad_norm": 0.8237200975418091, "learning_rate": 1.945620432661163e-05, "loss": 0.6422, "step": 7647 }, { "epoch": 1.476732960030894, "grad_norm": 0.8499940037727356, "learning_rate": 1.9442865138307576e-05, "loss": 0.5897, "step": 7648 }, { "epoch": 1.4769260474995174, "grad_norm": 1.0257720947265625, "learning_rate": 1.9429529420520532e-05, "loss": 0.5627, "step": 7649 }, { "epoch": 1.4771191349681405, "grad_norm": 0.7129989862442017, "learning_rate": 1.9416197174765073e-05, "loss": 0.5976, "step": 7650 }, { "epoch": 1.4773122224367639, "grad_norm": 0.7864265441894531, "learning_rate": 1.9402868402555413e-05, "loss": 0.6032, "step": 7651 }, { "epoch": 1.4775053099053872, "grad_norm": 0.7338788509368896, "learning_rate": 1.9389543105405345e-05, "loss": 0.5753, "step": 7652 }, { "epoch": 1.4776983973740103, "grad_norm": 1.1945282220840454, "learning_rate": 1.9376221284828337e-05, "loss": 0.6548, "step": 7653 }, { "epoch": 1.4778914848426337, "grad_norm": 6.5894389152526855, "learning_rate": 1.9362902942337336e-05, "loss": 0.5775, "step": 7654 }, { "epoch": 1.478084572311257, "grad_norm": 1.231005072593689, "learning_rate": 1.9349588079445032e-05, "loss": 0.6043, "step": 7655 }, { "epoch": 1.4782776597798803, "grad_norm": 0.6856643557548523, "learning_rate": 1.9336276697663624e-05, "loss": 0.5934, "step": 7656 }, { "epoch": 1.4784707472485037, "grad_norm": 0.7774437069892883, "learning_rate": 1.9322968798504943e-05, "loss": 0.5845, "step": 7657 }, { "epoch": 1.4786638347171268, "grad_norm": 1.4465595483779907, "learning_rate": 1.930966438348046e-05, "loss": 0.5877, "step": 7658 }, { "epoch": 1.4788569221857502, "grad_norm": 0.7201066017150879, "learning_rate": 1.929636345410122e-05, "loss": 0.6455, "step": 7659 }, { "epoch": 1.4790500096543735, "grad_norm": 0.7345449328422546, "learning_rate": 1.928306601187782e-05, "loss": 0.5964, "step": 7660 }, { "epoch": 1.4792430971229966, "grad_norm": 0.7405939698219299, "learning_rate": 1.9269772058320563e-05, "loss": 0.6159, "step": 7661 }, { "epoch": 1.47943618459162, "grad_norm": 0.7147534489631653, "learning_rate": 1.9256481594939285e-05, "loss": 0.5943, "step": 7662 }, { "epoch": 1.4796292720602433, "grad_norm": 0.6774410009384155, "learning_rate": 1.9243194623243437e-05, "loss": 0.587, "step": 7663 }, { "epoch": 1.4798223595288666, "grad_norm": 0.7716153264045715, "learning_rate": 1.9229911144742136e-05, "loss": 0.6484, "step": 7664 }, { "epoch": 1.48001544699749, "grad_norm": 1.353955864906311, "learning_rate": 1.9216631160943964e-05, "loss": 0.5344, "step": 7665 }, { "epoch": 1.4802085344661131, "grad_norm": 0.7175076007843018, "learning_rate": 1.920335467335726e-05, "loss": 0.5823, "step": 7666 }, { "epoch": 1.4804016219347365, "grad_norm": 4.070765972137451, "learning_rate": 1.9190081683489875e-05, "loss": 0.6519, "step": 7667 }, { "epoch": 1.4805947094033598, "grad_norm": 0.7862683534622192, "learning_rate": 1.9176812192849265e-05, "loss": 0.6116, "step": 7668 }, { "epoch": 1.480787796871983, "grad_norm": 0.6585474610328674, "learning_rate": 1.916354620294254e-05, "loss": 0.5532, "step": 7669 }, { "epoch": 1.4809808843406063, "grad_norm": 0.792079508304596, "learning_rate": 1.9150283715276385e-05, "loss": 0.6006, "step": 7670 }, { "epoch": 1.4811739718092296, "grad_norm": 0.6103190779685974, "learning_rate": 1.9137024731357028e-05, "loss": 0.5911, "step": 7671 }, { "epoch": 1.481367059277853, "grad_norm": 1.4096994400024414, "learning_rate": 1.912376925269041e-05, "loss": 0.6858, "step": 7672 }, { "epoch": 1.4815601467464763, "grad_norm": 1.3481844663619995, "learning_rate": 1.9110517280782e-05, "loss": 0.5756, "step": 7673 }, { "epoch": 1.4817532342150994, "grad_norm": 0.8612849116325378, "learning_rate": 1.9097268817136865e-05, "loss": 0.5837, "step": 7674 }, { "epoch": 1.4819463216837228, "grad_norm": 0.5804440379142761, "learning_rate": 1.9084023863259732e-05, "loss": 0.5775, "step": 7675 }, { "epoch": 1.4821394091523459, "grad_norm": 0.6456375122070312, "learning_rate": 1.9070782420654876e-05, "loss": 0.5483, "step": 7676 }, { "epoch": 1.4823324966209692, "grad_norm": 0.890791118144989, "learning_rate": 1.905754449082619e-05, "loss": 0.6492, "step": 7677 }, { "epoch": 1.4825255840895926, "grad_norm": 2.6042277812957764, "learning_rate": 1.9044310075277167e-05, "loss": 0.6153, "step": 7678 }, { "epoch": 1.482718671558216, "grad_norm": 1.233593463897705, "learning_rate": 1.903107917551088e-05, "loss": 0.6488, "step": 7679 }, { "epoch": 1.4829117590268392, "grad_norm": 0.8746258020401001, "learning_rate": 1.9017851793030066e-05, "loss": 0.6047, "step": 7680 }, { "epoch": 1.4831048464954624, "grad_norm": 1.0097733736038208, "learning_rate": 1.9004627929337006e-05, "loss": 0.6055, "step": 7681 }, { "epoch": 1.4832979339640857, "grad_norm": 0.9419726729393005, "learning_rate": 1.899140758593358e-05, "loss": 0.5289, "step": 7682 }, { "epoch": 1.483491021432709, "grad_norm": 0.9390904307365417, "learning_rate": 1.89781907643213e-05, "loss": 0.6276, "step": 7683 }, { "epoch": 1.4836841089013322, "grad_norm": 0.6729966402053833, "learning_rate": 1.896497746600126e-05, "loss": 0.6033, "step": 7684 }, { "epoch": 1.4838771963699555, "grad_norm": 7.2639546394348145, "learning_rate": 1.895176769247413e-05, "loss": 0.5329, "step": 7685 }, { "epoch": 1.4840702838385789, "grad_norm": 1.181969404220581, "learning_rate": 1.8938561445240255e-05, "loss": 0.6674, "step": 7686 }, { "epoch": 1.4842633713072022, "grad_norm": 1.1941674947738647, "learning_rate": 1.89253587257995e-05, "loss": 0.6236, "step": 7687 }, { "epoch": 1.4844564587758255, "grad_norm": 0.8734825849533081, "learning_rate": 1.8912159535651363e-05, "loss": 0.5831, "step": 7688 }, { "epoch": 1.4846495462444487, "grad_norm": 1.2057255506515503, "learning_rate": 1.889896387629494e-05, "loss": 0.6535, "step": 7689 }, { "epoch": 1.484842633713072, "grad_norm": 1.345884919166565, "learning_rate": 1.8885771749228925e-05, "loss": 0.6401, "step": 7690 }, { "epoch": 1.4850357211816954, "grad_norm": 0.7002879977226257, "learning_rate": 1.887258315595159e-05, "loss": 0.5782, "step": 7691 }, { "epoch": 1.4852288086503185, "grad_norm": 0.7120988965034485, "learning_rate": 1.8859398097960864e-05, "loss": 0.6255, "step": 7692 }, { "epoch": 1.4854218961189418, "grad_norm": 0.8263649940490723, "learning_rate": 1.884621657675421e-05, "loss": 0.6358, "step": 7693 }, { "epoch": 1.4856149835875652, "grad_norm": 0.8165671229362488, "learning_rate": 1.8833038593828722e-05, "loss": 0.5484, "step": 7694 }, { "epoch": 1.4858080710561885, "grad_norm": 0.7796449065208435, "learning_rate": 1.8819864150681084e-05, "loss": 0.5571, "step": 7695 }, { "epoch": 1.4860011585248118, "grad_norm": 1.1193876266479492, "learning_rate": 1.880669324880756e-05, "loss": 0.5458, "step": 7696 }, { "epoch": 1.486194245993435, "grad_norm": 0.6652802228927612, "learning_rate": 1.8793525889704066e-05, "loss": 0.5335, "step": 7697 }, { "epoch": 1.4863873334620583, "grad_norm": 1.060086965560913, "learning_rate": 1.878036207486607e-05, "loss": 0.5714, "step": 7698 }, { "epoch": 1.4865804209306817, "grad_norm": 2.502399444580078, "learning_rate": 1.8767201805788638e-05, "loss": 0.5951, "step": 7699 }, { "epoch": 1.4867735083993048, "grad_norm": 0.7475535273551941, "learning_rate": 1.8754045083966447e-05, "loss": 0.511, "step": 7700 }, { "epoch": 1.4869665958679281, "grad_norm": 0.7661181688308716, "learning_rate": 1.874089191089377e-05, "loss": 0.6218, "step": 7701 }, { "epoch": 1.4871596833365515, "grad_norm": 1.158313512802124, "learning_rate": 1.872774228806446e-05, "loss": 0.6099, "step": 7702 }, { "epoch": 1.4873527708051748, "grad_norm": 0.6338856220245361, "learning_rate": 1.8714596216972007e-05, "loss": 0.5898, "step": 7703 }, { "epoch": 1.4875458582737981, "grad_norm": 0.7864298224449158, "learning_rate": 1.870145369910946e-05, "loss": 0.5881, "step": 7704 }, { "epoch": 1.4877389457424213, "grad_norm": 1.1127315759658813, "learning_rate": 1.8688314735969476e-05, "loss": 0.599, "step": 7705 }, { "epoch": 1.4879320332110446, "grad_norm": 1.2791659832000732, "learning_rate": 1.8675179329044307e-05, "loss": 0.5511, "step": 7706 }, { "epoch": 1.488125120679668, "grad_norm": 1.0307267904281616, "learning_rate": 1.8662047479825788e-05, "loss": 0.5661, "step": 7707 }, { "epoch": 1.488318208148291, "grad_norm": 3.543966770172119, "learning_rate": 1.8648919189805398e-05, "loss": 0.6582, "step": 7708 }, { "epoch": 1.4885112956169144, "grad_norm": 1.2499053478240967, "learning_rate": 1.863579446047416e-05, "loss": 0.5685, "step": 7709 }, { "epoch": 1.4887043830855378, "grad_norm": 0.7096170783042908, "learning_rate": 1.8622673293322712e-05, "loss": 0.6097, "step": 7710 }, { "epoch": 1.488897470554161, "grad_norm": 0.7171360850334167, "learning_rate": 1.8609555689841283e-05, "loss": 0.5912, "step": 7711 }, { "epoch": 1.4890905580227844, "grad_norm": 0.8653685450553894, "learning_rate": 1.8596441651519714e-05, "loss": 0.6188, "step": 7712 }, { "epoch": 1.4892836454914076, "grad_norm": 0.8986153602600098, "learning_rate": 1.8583331179847392e-05, "loss": 0.5685, "step": 7713 }, { "epoch": 1.489476732960031, "grad_norm": 0.8980768322944641, "learning_rate": 1.8570224276313382e-05, "loss": 0.5645, "step": 7714 }, { "epoch": 1.4896698204286543, "grad_norm": 0.78794926404953, "learning_rate": 1.855712094240627e-05, "loss": 0.6096, "step": 7715 }, { "epoch": 1.4898629078972774, "grad_norm": 0.7034898400306702, "learning_rate": 1.8544021179614273e-05, "loss": 0.6361, "step": 7716 }, { "epoch": 1.4900559953659007, "grad_norm": 0.6777330040931702, "learning_rate": 1.8530924989425184e-05, "loss": 0.543, "step": 7717 }, { "epoch": 1.490249082834524, "grad_norm": 1.092397928237915, "learning_rate": 1.851783237332639e-05, "loss": 0.6258, "step": 7718 }, { "epoch": 1.4904421703031474, "grad_norm": 5.633089065551758, "learning_rate": 1.8504743332804907e-05, "loss": 0.6264, "step": 7719 }, { "epoch": 1.4906352577717707, "grad_norm": 2.212676763534546, "learning_rate": 1.8491657869347302e-05, "loss": 0.6302, "step": 7720 }, { "epoch": 1.4908283452403939, "grad_norm": 0.8356274962425232, "learning_rate": 1.8478575984439744e-05, "loss": 0.5775, "step": 7721 }, { "epoch": 1.4910214327090172, "grad_norm": 0.9808542132377625, "learning_rate": 1.8465497679568045e-05, "loss": 0.589, "step": 7722 }, { "epoch": 1.4912145201776403, "grad_norm": 0.9998731017112732, "learning_rate": 1.8452422956217524e-05, "loss": 0.6274, "step": 7723 }, { "epoch": 1.4914076076462637, "grad_norm": 1.2260491847991943, "learning_rate": 1.8439351815873134e-05, "loss": 0.5973, "step": 7724 }, { "epoch": 1.491600695114887, "grad_norm": 0.6268200278282166, "learning_rate": 1.8426284260019467e-05, "loss": 0.615, "step": 7725 }, { "epoch": 1.4917937825835104, "grad_norm": 1.0896437168121338, "learning_rate": 1.8413220290140643e-05, "loss": 0.6036, "step": 7726 }, { "epoch": 1.4919868700521337, "grad_norm": 2.472799777984619, "learning_rate": 1.84001599077204e-05, "loss": 0.5573, "step": 7727 }, { "epoch": 1.4921799575207568, "grad_norm": 1.0646028518676758, "learning_rate": 1.8387103114242065e-05, "loss": 0.6643, "step": 7728 }, { "epoch": 1.4923730449893802, "grad_norm": 5.014395713806152, "learning_rate": 1.8374049911188545e-05, "loss": 0.6072, "step": 7729 }, { "epoch": 1.4925661324580035, "grad_norm": 2.7356369495391846, "learning_rate": 1.8361000300042393e-05, "loss": 0.6065, "step": 7730 }, { "epoch": 1.4927592199266266, "grad_norm": 0.7209846377372742, "learning_rate": 1.8347954282285685e-05, "loss": 0.5674, "step": 7731 }, { "epoch": 1.49295230739525, "grad_norm": 1.1113563776016235, "learning_rate": 1.833491185940013e-05, "loss": 0.6454, "step": 7732 }, { "epoch": 1.4931453948638733, "grad_norm": 2.5487260818481445, "learning_rate": 1.8321873032867004e-05, "loss": 0.6009, "step": 7733 }, { "epoch": 1.4933384823324967, "grad_norm": 1.0753196477890015, "learning_rate": 1.83088378041672e-05, "loss": 0.5651, "step": 7734 }, { "epoch": 1.49353156980112, "grad_norm": 0.7135442495346069, "learning_rate": 1.829580617478117e-05, "loss": 0.5958, "step": 7735 }, { "epoch": 1.4937246572697431, "grad_norm": 0.8052011728286743, "learning_rate": 1.828277814618901e-05, "loss": 0.511, "step": 7736 }, { "epoch": 1.4939177447383665, "grad_norm": 0.8252800107002258, "learning_rate": 1.8269753719870358e-05, "loss": 0.5614, "step": 7737 }, { "epoch": 1.4941108322069898, "grad_norm": 1.420206904411316, "learning_rate": 1.8256732897304436e-05, "loss": 0.6574, "step": 7738 }, { "epoch": 1.494303919675613, "grad_norm": 0.7082727551460266, "learning_rate": 1.8243715679970143e-05, "loss": 0.6073, "step": 7739 }, { "epoch": 1.4944970071442363, "grad_norm": 1.3265951871871948, "learning_rate": 1.823070206934584e-05, "loss": 0.5927, "step": 7740 }, { "epoch": 1.4946900946128596, "grad_norm": 0.9078068137168884, "learning_rate": 1.8217692066909553e-05, "loss": 0.5723, "step": 7741 }, { "epoch": 1.494883182081483, "grad_norm": 0.9185839891433716, "learning_rate": 1.820468567413892e-05, "loss": 0.5879, "step": 7742 }, { "epoch": 1.4950762695501063, "grad_norm": 0.9399170279502869, "learning_rate": 1.8191682892511123e-05, "loss": 0.6011, "step": 7743 }, { "epoch": 1.4952693570187294, "grad_norm": 0.7847424149513245, "learning_rate": 1.8178683723502932e-05, "loss": 0.5569, "step": 7744 }, { "epoch": 1.4954624444873528, "grad_norm": 0.7087743878364563, "learning_rate": 1.8165688168590773e-05, "loss": 0.6143, "step": 7745 }, { "epoch": 1.4956555319559761, "grad_norm": 0.7954963445663452, "learning_rate": 1.8152696229250544e-05, "loss": 0.6538, "step": 7746 }, { "epoch": 1.4958486194245992, "grad_norm": 1.0601706504821777, "learning_rate": 1.8139707906957853e-05, "loss": 0.5509, "step": 7747 }, { "epoch": 1.4960417068932226, "grad_norm": 0.5680239200592041, "learning_rate": 1.812672320318783e-05, "loss": 0.6141, "step": 7748 }, { "epoch": 1.496234794361846, "grad_norm": 1.1680151224136353, "learning_rate": 1.8113742119415176e-05, "loss": 0.6724, "step": 7749 }, { "epoch": 1.4964278818304693, "grad_norm": 0.6296737790107727, "learning_rate": 1.810076465711429e-05, "loss": 0.5787, "step": 7750 }, { "epoch": 1.4966209692990926, "grad_norm": 1.3534537553787231, "learning_rate": 1.808779081775901e-05, "loss": 0.5507, "step": 7751 }, { "epoch": 1.4968140567677157, "grad_norm": 1.6378732919692993, "learning_rate": 1.8074820602822852e-05, "loss": 0.5923, "step": 7752 }, { "epoch": 1.497007144236339, "grad_norm": 0.8552410006523132, "learning_rate": 1.8061854013778924e-05, "loss": 0.6067, "step": 7753 }, { "epoch": 1.4972002317049624, "grad_norm": 0.7096274495124817, "learning_rate": 1.8048891052099893e-05, "loss": 0.6102, "step": 7754 }, { "epoch": 1.4973933191735855, "grad_norm": 1.099435806274414, "learning_rate": 1.8035931719258003e-05, "loss": 0.6668, "step": 7755 }, { "epoch": 1.4975864066422089, "grad_norm": 1.0226577520370483, "learning_rate": 1.802297601672516e-05, "loss": 0.5994, "step": 7756 }, { "epoch": 1.4977794941108322, "grad_norm": 0.9224835634231567, "learning_rate": 1.8010023945972743e-05, "loss": 0.5902, "step": 7757 }, { "epoch": 1.4979725815794556, "grad_norm": 0.7300437688827515, "learning_rate": 1.7997075508471817e-05, "loss": 0.6199, "step": 7758 }, { "epoch": 1.498165669048079, "grad_norm": 1.0883135795593262, "learning_rate": 1.7984130705692986e-05, "loss": 0.5651, "step": 7759 }, { "epoch": 1.498358756516702, "grad_norm": 0.9876291751861572, "learning_rate": 1.797118953910643e-05, "loss": 0.5948, "step": 7760 }, { "epoch": 1.4985518439853254, "grad_norm": 1.0089534521102905, "learning_rate": 1.7958252010181982e-05, "loss": 0.593, "step": 7761 }, { "epoch": 1.4987449314539487, "grad_norm": 0.6372475028038025, "learning_rate": 1.794531812038901e-05, "loss": 0.6353, "step": 7762 }, { "epoch": 1.4989380189225718, "grad_norm": 0.8527602553367615, "learning_rate": 1.7932387871196428e-05, "loss": 0.6455, "step": 7763 }, { "epoch": 1.4991311063911952, "grad_norm": 1.3582313060760498, "learning_rate": 1.7919461264072835e-05, "loss": 0.6409, "step": 7764 }, { "epoch": 1.4993241938598185, "grad_norm": 0.8489587306976318, "learning_rate": 1.7906538300486347e-05, "loss": 0.6902, "step": 7765 }, { "epoch": 1.4995172813284419, "grad_norm": 0.8529195785522461, "learning_rate": 1.7893618981904675e-05, "loss": 0.6361, "step": 7766 }, { "epoch": 1.4997103687970652, "grad_norm": 0.7352434992790222, "learning_rate": 1.7880703309795173e-05, "loss": 0.6644, "step": 7767 }, { "epoch": 1.4999034562656883, "grad_norm": 0.8279403448104858, "learning_rate": 1.7867791285624664e-05, "loss": 0.5744, "step": 7768 }, { "epoch": 1.5000965437343117, "grad_norm": 1.0761224031448364, "learning_rate": 1.7854882910859683e-05, "loss": 0.5906, "step": 7769 }, { "epoch": 1.5002896312029348, "grad_norm": 2.524787425994873, "learning_rate": 1.7841978186966273e-05, "loss": 0.5879, "step": 7770 }, { "epoch": 1.5004827186715581, "grad_norm": 0.7321520447731018, "learning_rate": 1.782907711541009e-05, "loss": 0.6014, "step": 7771 }, { "epoch": 1.5006758061401815, "grad_norm": 1.0968972444534302, "learning_rate": 1.7816179697656356e-05, "loss": 0.6019, "step": 7772 }, { "epoch": 1.5008688936088048, "grad_norm": 2.061920642852783, "learning_rate": 1.7803285935169922e-05, "loss": 0.6138, "step": 7773 }, { "epoch": 1.5010619810774282, "grad_norm": 0.7888332605361938, "learning_rate": 1.7790395829415148e-05, "loss": 0.6832, "step": 7774 }, { "epoch": 1.5012550685460515, "grad_norm": 0.8099504709243774, "learning_rate": 1.777750938185606e-05, "loss": 0.5423, "step": 7775 }, { "epoch": 1.5014481560146746, "grad_norm": 0.9441084861755371, "learning_rate": 1.7764626593956223e-05, "loss": 0.5514, "step": 7776 }, { "epoch": 1.501641243483298, "grad_norm": 0.8776454329490662, "learning_rate": 1.7751747467178776e-05, "loss": 0.5913, "step": 7777 }, { "epoch": 1.501834330951921, "grad_norm": 2.3943212032318115, "learning_rate": 1.773887200298649e-05, "loss": 0.6179, "step": 7778 }, { "epoch": 1.5020274184205444, "grad_norm": 0.8399549722671509, "learning_rate": 1.7726000202841703e-05, "loss": 0.5553, "step": 7779 }, { "epoch": 1.5022205058891678, "grad_norm": 0.9617406129837036, "learning_rate": 1.7713132068206267e-05, "loss": 0.5681, "step": 7780 }, { "epoch": 1.5024135933577911, "grad_norm": 0.5158629417419434, "learning_rate": 1.7700267600541725e-05, "loss": 0.5587, "step": 7781 }, { "epoch": 1.5026066808264145, "grad_norm": 1.1149402856826782, "learning_rate": 1.768740680130915e-05, "loss": 0.5598, "step": 7782 }, { "epoch": 1.5027997682950378, "grad_norm": 1.2248581647872925, "learning_rate": 1.767454967196917e-05, "loss": 0.5954, "step": 7783 }, { "epoch": 1.502992855763661, "grad_norm": 0.8746914267539978, "learning_rate": 1.766169621398208e-05, "loss": 0.6054, "step": 7784 }, { "epoch": 1.5031859432322843, "grad_norm": 0.7363276481628418, "learning_rate": 1.764884642880768e-05, "loss": 0.5069, "step": 7785 }, { "epoch": 1.5033790307009074, "grad_norm": 0.7939462661743164, "learning_rate": 1.7636000317905382e-05, "loss": 0.5618, "step": 7786 }, { "epoch": 1.5035721181695307, "grad_norm": 1.2922006845474243, "learning_rate": 1.7623157882734176e-05, "loss": 0.6354, "step": 7787 }, { "epoch": 1.503765205638154, "grad_norm": 1.5506316423416138, "learning_rate": 1.7610319124752627e-05, "loss": 0.6725, "step": 7788 }, { "epoch": 1.5039582931067774, "grad_norm": 0.8260018229484558, "learning_rate": 1.7597484045418928e-05, "loss": 0.6447, "step": 7789 }, { "epoch": 1.5041513805754008, "grad_norm": 0.7787923812866211, "learning_rate": 1.7584652646190807e-05, "loss": 0.5815, "step": 7790 }, { "epoch": 1.504344468044024, "grad_norm": 0.8349242210388184, "learning_rate": 1.757182492852555e-05, "loss": 0.5907, "step": 7791 }, { "epoch": 1.5045375555126472, "grad_norm": 0.8523642420768738, "learning_rate": 1.7559000893880102e-05, "loss": 0.5954, "step": 7792 }, { "epoch": 1.5047306429812703, "grad_norm": 0.8776655197143555, "learning_rate": 1.7546180543710933e-05, "loss": 0.6467, "step": 7793 }, { "epoch": 1.5049237304498937, "grad_norm": 0.720615565776825, "learning_rate": 1.7533363879474102e-05, "loss": 0.5998, "step": 7794 }, { "epoch": 1.505116817918517, "grad_norm": 0.9755424857139587, "learning_rate": 1.7520550902625278e-05, "loss": 0.5972, "step": 7795 }, { "epoch": 1.5053099053871404, "grad_norm": 0.6965921521186829, "learning_rate": 1.7507741614619684e-05, "loss": 0.555, "step": 7796 }, { "epoch": 1.5055029928557637, "grad_norm": 1.33354651927948, "learning_rate": 1.7494936016912127e-05, "loss": 0.6111, "step": 7797 }, { "epoch": 1.505696080324387, "grad_norm": 1.5952597856521606, "learning_rate": 1.7482134110957e-05, "loss": 0.595, "step": 7798 }, { "epoch": 1.5058891677930102, "grad_norm": 2.6667306423187256, "learning_rate": 1.7469335898208257e-05, "loss": 0.572, "step": 7799 }, { "epoch": 1.5060822552616335, "grad_norm": 0.729238748550415, "learning_rate": 1.7456541380119483e-05, "loss": 0.6515, "step": 7800 }, { "epoch": 1.5062753427302567, "grad_norm": 0.9694631695747375, "learning_rate": 1.7443750558143803e-05, "loss": 0.6094, "step": 7801 }, { "epoch": 1.50646843019888, "grad_norm": 0.7371339201927185, "learning_rate": 1.7430963433733922e-05, "loss": 0.6479, "step": 7802 }, { "epoch": 1.5066615176675033, "grad_norm": 0.6904452443122864, "learning_rate": 1.7418180008342133e-05, "loss": 0.6116, "step": 7803 }, { "epoch": 1.5068546051361267, "grad_norm": 0.9476543068885803, "learning_rate": 1.7405400283420315e-05, "loss": 0.6977, "step": 7804 }, { "epoch": 1.50704769260475, "grad_norm": 0.8620442748069763, "learning_rate": 1.7392624260419903e-05, "loss": 0.6385, "step": 7805 }, { "epoch": 1.5072407800733734, "grad_norm": 0.8279932737350464, "learning_rate": 1.7379851940791965e-05, "loss": 0.6428, "step": 7806 }, { "epoch": 1.5074338675419965, "grad_norm": 0.7647597193717957, "learning_rate": 1.736708332598709e-05, "loss": 0.7272, "step": 7807 }, { "epoch": 1.5076269550106198, "grad_norm": 0.7474058270454407, "learning_rate": 1.7354318417455472e-05, "loss": 0.6093, "step": 7808 }, { "epoch": 1.507820042479243, "grad_norm": 1.0876630544662476, "learning_rate": 1.7341557216646893e-05, "loss": 0.5697, "step": 7809 }, { "epoch": 1.5080131299478663, "grad_norm": 0.8095265030860901, "learning_rate": 1.7328799725010665e-05, "loss": 0.6229, "step": 7810 }, { "epoch": 1.5082062174164896, "grad_norm": 1.6609842777252197, "learning_rate": 1.731604594399576e-05, "loss": 0.6799, "step": 7811 }, { "epoch": 1.508399304885113, "grad_norm": 1.307479977607727, "learning_rate": 1.7303295875050673e-05, "loss": 0.6043, "step": 7812 }, { "epoch": 1.5085923923537363, "grad_norm": 0.8185714483261108, "learning_rate": 1.7290549519623482e-05, "loss": 0.6039, "step": 7813 }, { "epoch": 1.5087854798223597, "grad_norm": 0.9451993703842163, "learning_rate": 1.7277806879161858e-05, "loss": 0.6306, "step": 7814 }, { "epoch": 1.5089785672909828, "grad_norm": 19.89622688293457, "learning_rate": 1.7265067955113028e-05, "loss": 0.5701, "step": 7815 }, { "epoch": 1.5091716547596061, "grad_norm": 0.7172645330429077, "learning_rate": 1.725233274892381e-05, "loss": 0.6506, "step": 7816 }, { "epoch": 1.5093647422282293, "grad_norm": 0.9942845106124878, "learning_rate": 1.723960126204063e-05, "loss": 0.5874, "step": 7817 }, { "epoch": 1.5095578296968526, "grad_norm": 1.2912161350250244, "learning_rate": 1.7226873495909434e-05, "loss": 0.5804, "step": 7818 }, { "epoch": 1.509750917165476, "grad_norm": 2.9624438285827637, "learning_rate": 1.721414945197579e-05, "loss": 0.6096, "step": 7819 }, { "epoch": 1.5099440046340993, "grad_norm": 0.8070615530014038, "learning_rate": 1.720142913168482e-05, "loss": 0.6274, "step": 7820 }, { "epoch": 1.5101370921027226, "grad_norm": 1.691908359527588, "learning_rate": 1.7188712536481232e-05, "loss": 0.6291, "step": 7821 }, { "epoch": 1.510330179571346, "grad_norm": 0.9958112835884094, "learning_rate": 1.7175999667809294e-05, "loss": 0.6197, "step": 7822 }, { "epoch": 1.510523267039969, "grad_norm": 21.616395950317383, "learning_rate": 1.7163290527112897e-05, "loss": 0.5859, "step": 7823 }, { "epoch": 1.5107163545085924, "grad_norm": 0.6483842134475708, "learning_rate": 1.7150585115835456e-05, "loss": 0.6327, "step": 7824 }, { "epoch": 1.5109094419772156, "grad_norm": 0.8859455585479736, "learning_rate": 1.7137883435419995e-05, "loss": 0.6445, "step": 7825 }, { "epoch": 1.511102529445839, "grad_norm": 0.9023823142051697, "learning_rate": 1.71251854873091e-05, "loss": 0.6147, "step": 7826 }, { "epoch": 1.5112956169144622, "grad_norm": 0.9264420866966248, "learning_rate": 1.7112491272944913e-05, "loss": 0.6415, "step": 7827 }, { "epoch": 1.5114887043830856, "grad_norm": 1.1283541917800903, "learning_rate": 1.7099800793769217e-05, "loss": 0.5925, "step": 7828 }, { "epoch": 1.511681791851709, "grad_norm": 1.873639702796936, "learning_rate": 1.70871140512233e-05, "loss": 0.5742, "step": 7829 }, { "epoch": 1.5118748793203323, "grad_norm": 0.9088301062583923, "learning_rate": 1.7074431046748075e-05, "loss": 0.599, "step": 7830 }, { "epoch": 1.5120679667889554, "grad_norm": 0.7701099514961243, "learning_rate": 1.7061751781783996e-05, "loss": 0.6155, "step": 7831 }, { "epoch": 1.5122610542575787, "grad_norm": 0.7998192310333252, "learning_rate": 1.7049076257771106e-05, "loss": 0.6897, "step": 7832 }, { "epoch": 1.5124541417262019, "grad_norm": 1.0949835777282715, "learning_rate": 1.7036404476149017e-05, "loss": 0.6399, "step": 7833 }, { "epoch": 1.5126472291948252, "grad_norm": 0.8193252086639404, "learning_rate": 1.7023736438356948e-05, "loss": 0.5767, "step": 7834 }, { "epoch": 1.5128403166634485, "grad_norm": 0.8261276483535767, "learning_rate": 1.7011072145833646e-05, "loss": 0.6233, "step": 7835 }, { "epoch": 1.5130334041320719, "grad_norm": 0.9054979681968689, "learning_rate": 1.6998411600017465e-05, "loss": 0.5577, "step": 7836 }, { "epoch": 1.5132264916006952, "grad_norm": 0.8573469519615173, "learning_rate": 1.698575480234632e-05, "loss": 0.5951, "step": 7837 }, { "epoch": 1.5134195790693186, "grad_norm": 1.2244285345077515, "learning_rate": 1.697310175425768e-05, "loss": 0.636, "step": 7838 }, { "epoch": 1.5136126665379417, "grad_norm": 0.9646856784820557, "learning_rate": 1.6960452457188652e-05, "loss": 0.6202, "step": 7839 }, { "epoch": 1.5138057540065648, "grad_norm": 2.2703464031219482, "learning_rate": 1.6947806912575852e-05, "loss": 0.5777, "step": 7840 }, { "epoch": 1.5139988414751882, "grad_norm": 0.9796308875083923, "learning_rate": 1.6935165121855486e-05, "loss": 0.6128, "step": 7841 }, { "epoch": 1.5141919289438115, "grad_norm": 1.1179531812667847, "learning_rate": 1.692252708646338e-05, "loss": 0.5812, "step": 7842 }, { "epoch": 1.5143850164124348, "grad_norm": 2.1237597465515137, "learning_rate": 1.6909892807834855e-05, "loss": 0.5873, "step": 7843 }, { "epoch": 1.5145781038810582, "grad_norm": 1.323593258857727, "learning_rate": 1.6897262287404843e-05, "loss": 0.667, "step": 7844 }, { "epoch": 1.5147711913496815, "grad_norm": 1.5515291690826416, "learning_rate": 1.6884635526607878e-05, "loss": 0.5701, "step": 7845 }, { "epoch": 1.5149642788183046, "grad_norm": 1.1190234422683716, "learning_rate": 1.6872012526878027e-05, "loss": 0.5563, "step": 7846 }, { "epoch": 1.515157366286928, "grad_norm": 0.7613918781280518, "learning_rate": 1.6859393289648934e-05, "loss": 0.5656, "step": 7847 }, { "epoch": 1.515350453755551, "grad_norm": 0.8034970164299011, "learning_rate": 1.684677781635387e-05, "loss": 0.6004, "step": 7848 }, { "epoch": 1.5155435412241745, "grad_norm": 0.8992485404014587, "learning_rate": 1.683416610842556e-05, "loss": 0.6749, "step": 7849 }, { "epoch": 1.5157366286927978, "grad_norm": 0.8373244404792786, "learning_rate": 1.6821558167296437e-05, "loss": 0.6468, "step": 7850 }, { "epoch": 1.5159297161614211, "grad_norm": 0.8038786053657532, "learning_rate": 1.6808953994398414e-05, "loss": 0.6113, "step": 7851 }, { "epoch": 1.5161228036300445, "grad_norm": 1.0822149515151978, "learning_rate": 1.6796353591163e-05, "loss": 0.5888, "step": 7852 }, { "epoch": 1.5163158910986678, "grad_norm": 0.9844715595245361, "learning_rate": 1.6783756959021328e-05, "loss": 0.504, "step": 7853 }, { "epoch": 1.516508978567291, "grad_norm": 1.1206517219543457, "learning_rate": 1.6771164099404007e-05, "loss": 0.6606, "step": 7854 }, { "epoch": 1.5167020660359143, "grad_norm": 0.9998642802238464, "learning_rate": 1.6758575013741267e-05, "loss": 0.6175, "step": 7855 }, { "epoch": 1.5168951535045374, "grad_norm": 1.3125801086425781, "learning_rate": 1.6745989703462945e-05, "loss": 0.6882, "step": 7856 }, { "epoch": 1.5170882409731608, "grad_norm": 0.9168505668640137, "learning_rate": 1.6733408169998388e-05, "loss": 0.5996, "step": 7857 }, { "epoch": 1.517281328441784, "grad_norm": 0.9741201400756836, "learning_rate": 1.6720830414776533e-05, "loss": 0.6298, "step": 7858 }, { "epoch": 1.5174744159104074, "grad_norm": 0.9798782467842102, "learning_rate": 1.6708256439225943e-05, "loss": 0.5943, "step": 7859 }, { "epoch": 1.5176675033790308, "grad_norm": 0.8065174221992493, "learning_rate": 1.6695686244774634e-05, "loss": 0.6307, "step": 7860 }, { "epoch": 1.5178605908476541, "grad_norm": 1.2183576822280884, "learning_rate": 1.6683119832850307e-05, "loss": 0.5671, "step": 7861 }, { "epoch": 1.5180536783162772, "grad_norm": 1.1715621948242188, "learning_rate": 1.6670557204880182e-05, "loss": 0.665, "step": 7862 }, { "epoch": 1.5182467657849006, "grad_norm": 0.8286766409873962, "learning_rate": 1.6657998362291055e-05, "loss": 0.6073, "step": 7863 }, { "epoch": 1.5184398532535237, "grad_norm": 3.7705466747283936, "learning_rate": 1.6645443306509266e-05, "loss": 0.6029, "step": 7864 }, { "epoch": 1.518632940722147, "grad_norm": 0.8195961713790894, "learning_rate": 1.6632892038960808e-05, "loss": 0.6293, "step": 7865 }, { "epoch": 1.5188260281907704, "grad_norm": 2.082317352294922, "learning_rate": 1.6620344561071122e-05, "loss": 0.6167, "step": 7866 }, { "epoch": 1.5190191156593937, "grad_norm": 1.383804202079773, "learning_rate": 1.6607800874265328e-05, "loss": 0.6026, "step": 7867 }, { "epoch": 1.519212203128017, "grad_norm": 1.4541617631912231, "learning_rate": 1.6595260979968064e-05, "loss": 0.6142, "step": 7868 }, { "epoch": 1.5194052905966404, "grad_norm": 0.763737142086029, "learning_rate": 1.6582724879603518e-05, "loss": 0.6083, "step": 7869 }, { "epoch": 1.5195983780652635, "grad_norm": 0.9203377962112427, "learning_rate": 1.657019257459551e-05, "loss": 0.6077, "step": 7870 }, { "epoch": 1.5197914655338869, "grad_norm": 0.7017422914505005, "learning_rate": 1.65576640663674e-05, "loss": 0.5724, "step": 7871 }, { "epoch": 1.51998455300251, "grad_norm": 0.7167297005653381, "learning_rate": 1.654513935634205e-05, "loss": 0.5509, "step": 7872 }, { "epoch": 1.5201776404711334, "grad_norm": 0.7420059442520142, "learning_rate": 1.6532618445942e-05, "loss": 0.6565, "step": 7873 }, { "epoch": 1.5203707279397567, "grad_norm": 0.8307139277458191, "learning_rate": 1.65201013365893e-05, "loss": 0.6169, "step": 7874 }, { "epoch": 1.52056381540838, "grad_norm": 1.430720329284668, "learning_rate": 1.6507588029705552e-05, "loss": 0.585, "step": 7875 }, { "epoch": 1.5207569028770034, "grad_norm": 1.2565422058105469, "learning_rate": 1.6495078526712006e-05, "loss": 0.6003, "step": 7876 }, { "epoch": 1.5209499903456267, "grad_norm": 2.3948562145233154, "learning_rate": 1.6482572829029354e-05, "loss": 0.6184, "step": 7877 }, { "epoch": 1.5211430778142498, "grad_norm": 2.1168715953826904, "learning_rate": 1.647007093807798e-05, "loss": 0.6166, "step": 7878 }, { "epoch": 1.5213361652828732, "grad_norm": 0.8917309045791626, "learning_rate": 1.6457572855277763e-05, "loss": 0.6319, "step": 7879 }, { "epoch": 1.5215292527514963, "grad_norm": 1.3224437236785889, "learning_rate": 1.6445078582048155e-05, "loss": 0.608, "step": 7880 }, { "epoch": 1.5217223402201197, "grad_norm": 0.9711452126502991, "learning_rate": 1.6432588119808223e-05, "loss": 0.6437, "step": 7881 }, { "epoch": 1.521915427688743, "grad_norm": 0.9440467357635498, "learning_rate": 1.642010146997656e-05, "loss": 0.6305, "step": 7882 }, { "epoch": 1.5221085151573663, "grad_norm": 1.1970512866973877, "learning_rate": 1.6407618633971294e-05, "loss": 0.5593, "step": 7883 }, { "epoch": 1.5223016026259897, "grad_norm": 0.9579845070838928, "learning_rate": 1.6395139613210202e-05, "loss": 0.5975, "step": 7884 }, { "epoch": 1.522494690094613, "grad_norm": 1.1222436428070068, "learning_rate": 1.6382664409110577e-05, "loss": 0.6607, "step": 7885 }, { "epoch": 1.5226877775632361, "grad_norm": 1.7761229276657104, "learning_rate": 1.6370193023089253e-05, "loss": 0.5591, "step": 7886 }, { "epoch": 1.5228808650318595, "grad_norm": 1.0236549377441406, "learning_rate": 1.635772545656271e-05, "loss": 0.5903, "step": 7887 }, { "epoch": 1.5230739525004826, "grad_norm": 1.3373662233352661, "learning_rate": 1.6345261710946934e-05, "loss": 0.6671, "step": 7888 }, { "epoch": 1.523267039969106, "grad_norm": 1.2101333141326904, "learning_rate": 1.6332801787657483e-05, "loss": 0.5982, "step": 7889 }, { "epoch": 1.5234601274377293, "grad_norm": 0.8277810215950012, "learning_rate": 1.6320345688109485e-05, "loss": 0.5779, "step": 7890 }, { "epoch": 1.5236532149063526, "grad_norm": 1.371394395828247, "learning_rate": 1.6307893413717635e-05, "loss": 0.6289, "step": 7891 }, { "epoch": 1.523846302374976, "grad_norm": 0.7546523809432983, "learning_rate": 1.6295444965896217e-05, "loss": 0.557, "step": 7892 }, { "epoch": 1.524039389843599, "grad_norm": 0.7933998703956604, "learning_rate": 1.6283000346059064e-05, "loss": 0.6312, "step": 7893 }, { "epoch": 1.5242324773122224, "grad_norm": 0.8059420585632324, "learning_rate": 1.627055955561952e-05, "loss": 0.622, "step": 7894 }, { "epoch": 1.5244255647808456, "grad_norm": 0.7981029748916626, "learning_rate": 1.62581225959906e-05, "loss": 0.6523, "step": 7895 }, { "epoch": 1.524618652249469, "grad_norm": 0.7723202109336853, "learning_rate": 1.6245689468584795e-05, "loss": 0.6271, "step": 7896 }, { "epoch": 1.5248117397180923, "grad_norm": 0.7419690489768982, "learning_rate": 1.623326017481419e-05, "loss": 0.5929, "step": 7897 }, { "epoch": 1.5250048271867156, "grad_norm": 0.851036548614502, "learning_rate": 1.622083471609047e-05, "loss": 0.6023, "step": 7898 }, { "epoch": 1.525197914655339, "grad_norm": 0.8919738531112671, "learning_rate": 1.6208413093824838e-05, "loss": 0.6472, "step": 7899 }, { "epoch": 1.5253910021239623, "grad_norm": 1.0410492420196533, "learning_rate": 1.619599530942807e-05, "loss": 0.6202, "step": 7900 }, { "epoch": 1.5255840895925854, "grad_norm": 1.0995137691497803, "learning_rate": 1.6183581364310514e-05, "loss": 0.646, "step": 7901 }, { "epoch": 1.5257771770612087, "grad_norm": 0.9628298282623291, "learning_rate": 1.6171171259882074e-05, "loss": 0.6455, "step": 7902 }, { "epoch": 1.5259702645298319, "grad_norm": 0.8640128970146179, "learning_rate": 1.6158764997552217e-05, "loss": 0.5507, "step": 7903 }, { "epoch": 1.5261633519984552, "grad_norm": 0.641409158706665, "learning_rate": 1.614636257873001e-05, "loss": 0.7106, "step": 7904 }, { "epoch": 1.5263564394670786, "grad_norm": 0.9110792279243469, "learning_rate": 1.6133964004824035e-05, "loss": 0.5645, "step": 7905 }, { "epoch": 1.526549526935702, "grad_norm": 0.9583005905151367, "learning_rate": 1.612156927724246e-05, "loss": 0.5517, "step": 7906 }, { "epoch": 1.5267426144043252, "grad_norm": 0.9196207523345947, "learning_rate": 1.610917839739301e-05, "loss": 0.628, "step": 7907 }, { "epoch": 1.5269357018729486, "grad_norm": 0.9969815611839294, "learning_rate": 1.6096791366682962e-05, "loss": 0.6181, "step": 7908 }, { "epoch": 1.5271287893415717, "grad_norm": 1.4034221172332764, "learning_rate": 1.6084408186519196e-05, "loss": 0.6203, "step": 7909 }, { "epoch": 1.527321876810195, "grad_norm": 0.7025895714759827, "learning_rate": 1.6072028858308112e-05, "loss": 0.5673, "step": 7910 }, { "epoch": 1.5275149642788182, "grad_norm": 0.6254238486289978, "learning_rate": 1.6059653383455698e-05, "loss": 0.6571, "step": 7911 }, { "epoch": 1.5277080517474415, "grad_norm": 0.8430579900741577, "learning_rate": 1.6047281763367477e-05, "loss": 0.6253, "step": 7912 }, { "epoch": 1.5279011392160649, "grad_norm": 1.251882553100586, "learning_rate": 1.603491399944857e-05, "loss": 0.5888, "step": 7913 }, { "epoch": 1.5280942266846882, "grad_norm": 0.8172855377197266, "learning_rate": 1.6022550093103617e-05, "loss": 0.5629, "step": 7914 }, { "epoch": 1.5282873141533115, "grad_norm": 1.037023901939392, "learning_rate": 1.601019004573688e-05, "loss": 0.5671, "step": 7915 }, { "epoch": 1.5284804016219349, "grad_norm": 1.444292664527893, "learning_rate": 1.599783385875212e-05, "loss": 0.5759, "step": 7916 }, { "epoch": 1.528673489090558, "grad_norm": 1.0348349809646606, "learning_rate": 1.5985481533552704e-05, "loss": 0.5315, "step": 7917 }, { "epoch": 1.5288665765591813, "grad_norm": 0.9874212145805359, "learning_rate": 1.5973133071541536e-05, "loss": 0.5661, "step": 7918 }, { "epoch": 1.5290596640278045, "grad_norm": 0.8714022040367126, "learning_rate": 1.596078847412107e-05, "loss": 0.5638, "step": 7919 }, { "epoch": 1.5292527514964278, "grad_norm": 1.5666190385818481, "learning_rate": 1.594844774269338e-05, "loss": 0.5758, "step": 7920 }, { "epoch": 1.5294458389650512, "grad_norm": 2.040858268737793, "learning_rate": 1.593611087866003e-05, "loss": 0.5948, "step": 7921 }, { "epoch": 1.5296389264336745, "grad_norm": 1.6452947854995728, "learning_rate": 1.5923777883422193e-05, "loss": 0.591, "step": 7922 }, { "epoch": 1.5298320139022978, "grad_norm": 2.8847503662109375, "learning_rate": 1.591144875838057e-05, "loss": 0.6054, "step": 7923 }, { "epoch": 1.5300251013709212, "grad_norm": 0.8807005882263184, "learning_rate": 1.5899123504935447e-05, "loss": 0.6688, "step": 7924 }, { "epoch": 1.5302181888395443, "grad_norm": 1.9138251543045044, "learning_rate": 1.5886802124486644e-05, "loss": 0.68, "step": 7925 }, { "epoch": 1.5304112763081676, "grad_norm": 0.8022644519805908, "learning_rate": 1.587448461843359e-05, "loss": 0.577, "step": 7926 }, { "epoch": 1.5306043637767908, "grad_norm": 1.3172764778137207, "learning_rate": 1.586217098817522e-05, "loss": 0.6254, "step": 7927 }, { "epoch": 1.530797451245414, "grad_norm": 0.9296907186508179, "learning_rate": 1.5849861235110053e-05, "loss": 0.6321, "step": 7928 }, { "epoch": 1.5309905387140375, "grad_norm": 0.8762623071670532, "learning_rate": 1.5837555360636165e-05, "loss": 0.6189, "step": 7929 }, { "epoch": 1.5311836261826608, "grad_norm": 1.1727445125579834, "learning_rate": 1.582525336615118e-05, "loss": 0.5753, "step": 7930 }, { "epoch": 1.5313767136512841, "grad_norm": 1.1033403873443604, "learning_rate": 1.5812955253052326e-05, "loss": 0.5607, "step": 7931 }, { "epoch": 1.5315698011199075, "grad_norm": 1.151843786239624, "learning_rate": 1.5800661022736336e-05, "loss": 0.6085, "step": 7932 }, { "epoch": 1.5317628885885306, "grad_norm": 0.7102153301239014, "learning_rate": 1.5788370676599507e-05, "loss": 0.6535, "step": 7933 }, { "epoch": 1.531955976057154, "grad_norm": 0.7998164892196655, "learning_rate": 1.577608421603776e-05, "loss": 0.6023, "step": 7934 }, { "epoch": 1.532149063525777, "grad_norm": 0.9794909954071045, "learning_rate": 1.5763801642446487e-05, "loss": 0.6402, "step": 7935 }, { "epoch": 1.5323421509944004, "grad_norm": 1.2736314535140991, "learning_rate": 1.5751522957220665e-05, "loss": 0.625, "step": 7936 }, { "epoch": 1.5325352384630238, "grad_norm": 0.813583493232727, "learning_rate": 1.5739248161754878e-05, "loss": 0.5538, "step": 7937 }, { "epoch": 1.532728325931647, "grad_norm": 2.542945146560669, "learning_rate": 1.572697725744322e-05, "loss": 0.6651, "step": 7938 }, { "epoch": 1.5329214134002704, "grad_norm": 0.8254461884498596, "learning_rate": 1.571471024567935e-05, "loss": 0.6133, "step": 7939 }, { "epoch": 1.5331145008688936, "grad_norm": 0.6568042635917664, "learning_rate": 1.570244712785649e-05, "loss": 0.6212, "step": 7940 }, { "epoch": 1.533307588337517, "grad_norm": 0.8422073721885681, "learning_rate": 1.569018790536741e-05, "loss": 0.6015, "step": 7941 }, { "epoch": 1.53350067580614, "grad_norm": 0.8909154534339905, "learning_rate": 1.567793257960447e-05, "loss": 0.5845, "step": 7942 }, { "epoch": 1.5336937632747634, "grad_norm": 0.8563734889030457, "learning_rate": 1.566568115195956e-05, "loss": 0.5496, "step": 7943 }, { "epoch": 1.5338868507433867, "grad_norm": 0.7372366786003113, "learning_rate": 1.565343362382412e-05, "loss": 0.5992, "step": 7944 }, { "epoch": 1.53407993821201, "grad_norm": 1.1393184661865234, "learning_rate": 1.5641189996589166e-05, "loss": 0.6614, "step": 7945 }, { "epoch": 1.5342730256806334, "grad_norm": 5.771524906158447, "learning_rate": 1.5628950271645264e-05, "loss": 0.5752, "step": 7946 }, { "epoch": 1.5344661131492567, "grad_norm": 0.9096037745475769, "learning_rate": 1.5616714450382523e-05, "loss": 0.5979, "step": 7947 }, { "epoch": 1.5346592006178799, "grad_norm": 1.006868839263916, "learning_rate": 1.5604482534190653e-05, "loss": 0.5872, "step": 7948 }, { "epoch": 1.5348522880865032, "grad_norm": 0.7568910121917725, "learning_rate": 1.5592254524458864e-05, "loss": 0.5728, "step": 7949 }, { "epoch": 1.5350453755551263, "grad_norm": 4.466136455535889, "learning_rate": 1.5580030422575947e-05, "loss": 0.6573, "step": 7950 }, { "epoch": 1.5352384630237497, "grad_norm": 0.8977736234664917, "learning_rate": 1.556781022993029e-05, "loss": 0.6118, "step": 7951 }, { "epoch": 1.535431550492373, "grad_norm": 1.0190616846084595, "learning_rate": 1.555559394790975e-05, "loss": 0.6333, "step": 7952 }, { "epoch": 1.5356246379609964, "grad_norm": 1.2482163906097412, "learning_rate": 1.5543381577901793e-05, "loss": 0.6227, "step": 7953 }, { "epoch": 1.5358177254296197, "grad_norm": 1.8250885009765625, "learning_rate": 1.553117312129347e-05, "loss": 0.6841, "step": 7954 }, { "epoch": 1.536010812898243, "grad_norm": 5.280364513397217, "learning_rate": 1.551896857947132e-05, "loss": 0.6365, "step": 7955 }, { "epoch": 1.5362039003668662, "grad_norm": 1.2240110635757446, "learning_rate": 1.5506767953821483e-05, "loss": 0.6049, "step": 7956 }, { "epoch": 1.5363969878354895, "grad_norm": 0.9007218480110168, "learning_rate": 1.5494571245729638e-05, "loss": 0.6212, "step": 7957 }, { "epoch": 1.5365900753041126, "grad_norm": 1.238795280456543, "learning_rate": 1.548237845658101e-05, "loss": 0.6132, "step": 7958 }, { "epoch": 1.536783162772736, "grad_norm": 2.111774444580078, "learning_rate": 1.5470189587760414e-05, "loss": 0.5211, "step": 7959 }, { "epoch": 1.5369762502413593, "grad_norm": 1.7093108892440796, "learning_rate": 1.545800464065219e-05, "loss": 0.6097, "step": 7960 }, { "epoch": 1.5371693377099827, "grad_norm": 0.792203962802887, "learning_rate": 1.544582361664022e-05, "loss": 0.6846, "step": 7961 }, { "epoch": 1.537362425178606, "grad_norm": 1.308822512626648, "learning_rate": 1.5433646517108007e-05, "loss": 0.7064, "step": 7962 }, { "epoch": 1.5375555126472293, "grad_norm": 1.0257844924926758, "learning_rate": 1.542147334343852e-05, "loss": 0.6175, "step": 7963 }, { "epoch": 1.5377486001158525, "grad_norm": 1.4665683507919312, "learning_rate": 1.5409304097014315e-05, "loss": 0.5521, "step": 7964 }, { "epoch": 1.5379416875844758, "grad_norm": 1.6571404933929443, "learning_rate": 1.5397138779217547e-05, "loss": 0.6185, "step": 7965 }, { "epoch": 1.538134775053099, "grad_norm": 1.1683269739151, "learning_rate": 1.5384977391429866e-05, "loss": 0.6305, "step": 7966 }, { "epoch": 1.5383278625217223, "grad_norm": 1.1654683351516724, "learning_rate": 1.537281993503249e-05, "loss": 0.6293, "step": 7967 }, { "epoch": 1.5385209499903456, "grad_norm": 0.9570612907409668, "learning_rate": 1.5360666411406237e-05, "loss": 0.6435, "step": 7968 }, { "epoch": 1.538714037458969, "grad_norm": 1.195398211479187, "learning_rate": 1.5348516821931384e-05, "loss": 0.6433, "step": 7969 }, { "epoch": 1.5389071249275923, "grad_norm": 1.030814528465271, "learning_rate": 1.5336371167987862e-05, "loss": 0.5762, "step": 7970 }, { "epoch": 1.5391002123962156, "grad_norm": 0.9565578103065491, "learning_rate": 1.5324229450955097e-05, "loss": 0.6302, "step": 7971 }, { "epoch": 1.5392932998648388, "grad_norm": 0.5976481437683105, "learning_rate": 1.531209167221206e-05, "loss": 0.5901, "step": 7972 }, { "epoch": 1.539486387333462, "grad_norm": 1.007993459701538, "learning_rate": 1.529995783313733e-05, "loss": 0.6107, "step": 7973 }, { "epoch": 1.5396794748020852, "grad_norm": 1.2099944353103638, "learning_rate": 1.5287827935109002e-05, "loss": 0.631, "step": 7974 }, { "epoch": 1.5398725622707086, "grad_norm": 0.7991040945053101, "learning_rate": 1.5275701979504675e-05, "loss": 0.5931, "step": 7975 }, { "epoch": 1.540065649739332, "grad_norm": 0.7296366691589355, "learning_rate": 1.5263579967701603e-05, "loss": 0.635, "step": 7976 }, { "epoch": 1.5402587372079553, "grad_norm": 0.8678634166717529, "learning_rate": 1.5251461901076525e-05, "loss": 0.5457, "step": 7977 }, { "epoch": 1.5404518246765786, "grad_norm": 0.7335683703422546, "learning_rate": 1.5239347781005725e-05, "loss": 0.6623, "step": 7978 }, { "epoch": 1.540644912145202, "grad_norm": 1.1480152606964111, "learning_rate": 1.5227237608865114e-05, "loss": 0.6287, "step": 7979 }, { "epoch": 1.540837999613825, "grad_norm": 1.2986528873443604, "learning_rate": 1.5215131386030029e-05, "loss": 0.6056, "step": 7980 }, { "epoch": 1.5410310870824484, "grad_norm": 1.5114586353302002, "learning_rate": 1.5203029113875488e-05, "loss": 0.6388, "step": 7981 }, { "epoch": 1.5412241745510715, "grad_norm": 1.4579030275344849, "learning_rate": 1.5190930793775987e-05, "loss": 0.7027, "step": 7982 }, { "epoch": 1.5414172620196949, "grad_norm": 2.147144079208374, "learning_rate": 1.517883642710558e-05, "loss": 0.6169, "step": 7983 }, { "epoch": 1.5416103494883182, "grad_norm": 1.025173544883728, "learning_rate": 1.5166746015237871e-05, "loss": 0.6105, "step": 7984 }, { "epoch": 1.5418034369569416, "grad_norm": 0.6202569603919983, "learning_rate": 1.5154659559546069e-05, "loss": 0.5851, "step": 7985 }, { "epoch": 1.541996524425565, "grad_norm": 1.0825912952423096, "learning_rate": 1.514257706140283e-05, "loss": 0.5959, "step": 7986 }, { "epoch": 1.542189611894188, "grad_norm": 1.134511947631836, "learning_rate": 1.5130498522180463e-05, "loss": 0.5799, "step": 7987 }, { "epoch": 1.5423826993628114, "grad_norm": 0.9974941611289978, "learning_rate": 1.5118423943250771e-05, "loss": 0.6179, "step": 7988 }, { "epoch": 1.5425757868314345, "grad_norm": 0.7206392884254456, "learning_rate": 1.5106353325985107e-05, "loss": 0.5885, "step": 7989 }, { "epoch": 1.5427688743000578, "grad_norm": 1.7262086868286133, "learning_rate": 1.5094286671754416e-05, "loss": 0.6219, "step": 7990 }, { "epoch": 1.5429619617686812, "grad_norm": 0.7120406627655029, "learning_rate": 1.5082223981929167e-05, "loss": 0.6086, "step": 7991 }, { "epoch": 1.5431550492373045, "grad_norm": 0.9411210417747498, "learning_rate": 1.507016525787932e-05, "loss": 0.6107, "step": 7992 }, { "epoch": 1.5433481367059279, "grad_norm": 9.089773178100586, "learning_rate": 1.5058110500974504e-05, "loss": 0.616, "step": 7993 }, { "epoch": 1.5435412241745512, "grad_norm": 0.7832352519035339, "learning_rate": 1.5046059712583805e-05, "loss": 0.6489, "step": 7994 }, { "epoch": 1.5437343116431743, "grad_norm": 3.432382106781006, "learning_rate": 1.5034012894075878e-05, "loss": 0.5966, "step": 7995 }, { "epoch": 1.5439273991117977, "grad_norm": 0.746052622795105, "learning_rate": 1.5021970046818961e-05, "loss": 0.6618, "step": 7996 }, { "epoch": 1.5441204865804208, "grad_norm": 1.6323322057724, "learning_rate": 1.5009931172180814e-05, "loss": 0.6492, "step": 7997 }, { "epoch": 1.5443135740490441, "grad_norm": 0.7876116037368774, "learning_rate": 1.4997896271528739e-05, "loss": 0.5995, "step": 7998 }, { "epoch": 1.5445066615176675, "grad_norm": 0.8643178343772888, "learning_rate": 1.4985865346229594e-05, "loss": 0.6284, "step": 7999 }, { "epoch": 1.5446997489862908, "grad_norm": 0.6612162590026855, "learning_rate": 1.4973838397649776e-05, "loss": 0.6057, "step": 8000 }, { "epoch": 1.5446997489862908, "eval_loss": 0.6558152437210083, "eval_runtime": 49.4282, "eval_samples_per_second": 13.434, "eval_steps_per_second": 0.425, "step": 8000 } ], "logging_steps": 1, "max_steps": 10358, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.643347657122885e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }