|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4635450616072954, |
|
"eval_steps": 500, |
|
"global_step": 18312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007847256940708912, |
|
"grad_norm": 26.94572639465332, |
|
"learning_rate": 1.0157273918741808e-06, |
|
"loss": 8.5879, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0015694513881417823, |
|
"grad_norm": 14.633014678955078, |
|
"learning_rate": 2.0314547837483616e-06, |
|
"loss": 7.5048, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.002354177082212673, |
|
"grad_norm": 15.984803199768066, |
|
"learning_rate": 3.0471821756225426e-06, |
|
"loss": 6.1391, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0031389027762835646, |
|
"grad_norm": 11.297175407409668, |
|
"learning_rate": 4.062909567496723e-06, |
|
"loss": 4.9299, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.003923628470354455, |
|
"grad_norm": 14.864474296569824, |
|
"learning_rate": 5.078636959370905e-06, |
|
"loss": 4.3205, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.004708354164425346, |
|
"grad_norm": 11.237608909606934, |
|
"learning_rate": 6.094364351245085e-06, |
|
"loss": 4.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.005493079858496238, |
|
"grad_norm": 23.79303550720215, |
|
"learning_rate": 7.110091743119267e-06, |
|
"loss": 3.7952, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.006277805552567129, |
|
"grad_norm": 15.1551513671875, |
|
"learning_rate": 8.125819134993446e-06, |
|
"loss": 3.689, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.00706253124663802, |
|
"grad_norm": 14.605571746826172, |
|
"learning_rate": 9.141546526867629e-06, |
|
"loss": 3.5147, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.00784725694070891, |
|
"grad_norm": 16.463390350341797, |
|
"learning_rate": 1.015727391874181e-05, |
|
"loss": 3.3901, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.008631982634779801, |
|
"grad_norm": 13.09945011138916, |
|
"learning_rate": 1.117300131061599e-05, |
|
"loss": 3.317, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.009416708328850693, |
|
"grad_norm": 11.993067741394043, |
|
"learning_rate": 1.218872870249017e-05, |
|
"loss": 3.2508, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.010201434022921584, |
|
"grad_norm": 10.388030052185059, |
|
"learning_rate": 1.3204456094364351e-05, |
|
"loss": 3.1239, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.010986159716992476, |
|
"grad_norm": 11.977804183959961, |
|
"learning_rate": 1.4220183486238533e-05, |
|
"loss": 3.0739, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.011770885411063367, |
|
"grad_norm": 8.925983428955078, |
|
"learning_rate": 1.5235910878112714e-05, |
|
"loss": 3.0169, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.012555611105134258, |
|
"grad_norm": 9.57411003112793, |
|
"learning_rate": 1.6251638269986893e-05, |
|
"loss": 2.959, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.01334033679920515, |
|
"grad_norm": 7.380288124084473, |
|
"learning_rate": 1.7267365661861077e-05, |
|
"loss": 2.8921, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.01412506249327604, |
|
"grad_norm": 8.812368392944336, |
|
"learning_rate": 1.8283093053735257e-05, |
|
"loss": 2.843, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.014909788187346932, |
|
"grad_norm": 8.870095252990723, |
|
"learning_rate": 1.9298820445609438e-05, |
|
"loss": 2.7895, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.01569451388141782, |
|
"grad_norm": 9.503872871398926, |
|
"learning_rate": 2.031454783748362e-05, |
|
"loss": 2.7757, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.016479239575488712, |
|
"grad_norm": 6.582827568054199, |
|
"learning_rate": 2.13302752293578e-05, |
|
"loss": 2.7099, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.017263965269559603, |
|
"grad_norm": 6.266632556915283, |
|
"learning_rate": 2.234600262123198e-05, |
|
"loss": 2.6729, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.018048690963630494, |
|
"grad_norm": 6.645415306091309, |
|
"learning_rate": 2.336173001310616e-05, |
|
"loss": 2.6616, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.018833416657701385, |
|
"grad_norm": 7.8323073387146, |
|
"learning_rate": 2.437745740498034e-05, |
|
"loss": 2.6291, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.019618142351772276, |
|
"grad_norm": 5.577521324157715, |
|
"learning_rate": 2.5393184796854525e-05, |
|
"loss": 2.6072, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.020402868045843167, |
|
"grad_norm": 5.603636264801025, |
|
"learning_rate": 2.6408912188728702e-05, |
|
"loss": 2.5787, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.021187593739914058, |
|
"grad_norm": 6.945438385009766, |
|
"learning_rate": 2.7424639580602886e-05, |
|
"loss": 2.5198, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.021972319433984953, |
|
"grad_norm": 5.6279826164245605, |
|
"learning_rate": 2.8440366972477066e-05, |
|
"loss": 2.5417, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.022757045128055844, |
|
"grad_norm": 5.517001628875732, |
|
"learning_rate": 2.9456094364351244e-05, |
|
"loss": 2.4849, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.023541770822126735, |
|
"grad_norm": 5.865486145019531, |
|
"learning_rate": 3.0471821756225428e-05, |
|
"loss": 2.5103, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.024326496516197626, |
|
"grad_norm": 4.949043273925781, |
|
"learning_rate": 3.148754914809961e-05, |
|
"loss": 2.4581, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.025111222210268517, |
|
"grad_norm": 4.701717853546143, |
|
"learning_rate": 3.2503276539973785e-05, |
|
"loss": 2.4315, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.025895947904339408, |
|
"grad_norm": 4.533145904541016, |
|
"learning_rate": 3.351900393184797e-05, |
|
"loss": 2.4056, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.0266806735984103, |
|
"grad_norm": 4.724672794342041, |
|
"learning_rate": 3.453473132372215e-05, |
|
"loss": 2.3994, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.02746539929248119, |
|
"grad_norm": 4.745669364929199, |
|
"learning_rate": 3.555045871559633e-05, |
|
"loss": 2.3546, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.02825012498655208, |
|
"grad_norm": 4.4554948806762695, |
|
"learning_rate": 3.6566186107470514e-05, |
|
"loss": 2.3642, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.029034850680622972, |
|
"grad_norm": 4.4792304039001465, |
|
"learning_rate": 3.7581913499344695e-05, |
|
"loss": 2.3296, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.029819576374693863, |
|
"grad_norm": 3.9329679012298584, |
|
"learning_rate": 3.8597640891218876e-05, |
|
"loss": 2.3105, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.030604302068764754, |
|
"grad_norm": 4.338287830352783, |
|
"learning_rate": 3.9613368283093056e-05, |
|
"loss": 2.2811, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.03138902776283564, |
|
"grad_norm": 4.130499839782715, |
|
"learning_rate": 4.062909567496724e-05, |
|
"loss": 2.2898, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.03217375345690653, |
|
"grad_norm": 3.5664470195770264, |
|
"learning_rate": 4.164482306684142e-05, |
|
"loss": 2.2786, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.032958479150977424, |
|
"grad_norm": 3.642627716064453, |
|
"learning_rate": 4.26605504587156e-05, |
|
"loss": 2.2439, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.033743204845048315, |
|
"grad_norm": 3.7562780380249023, |
|
"learning_rate": 4.367627785058978e-05, |
|
"loss": 2.2441, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.034527930539119206, |
|
"grad_norm": 3.3117406368255615, |
|
"learning_rate": 4.469200524246396e-05, |
|
"loss": 2.2604, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.0353126562331901, |
|
"grad_norm": 3.4313724040985107, |
|
"learning_rate": 4.570773263433814e-05, |
|
"loss": 2.2069, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.03609738192726099, |
|
"grad_norm": 3.4720091819763184, |
|
"learning_rate": 4.672346002621232e-05, |
|
"loss": 2.2087, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.03688210762133188, |
|
"grad_norm": 3.491856575012207, |
|
"learning_rate": 4.77391874180865e-05, |
|
"loss": 2.1808, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 0.03766683331540277, |
|
"grad_norm": 3.3730666637420654, |
|
"learning_rate": 4.875491480996068e-05, |
|
"loss": 2.1907, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.03845155900947366, |
|
"grad_norm": 2.894322395324707, |
|
"learning_rate": 4.977064220183487e-05, |
|
"loss": 2.1689, |
|
"step": 1519 |
|
}, |
|
{ |
|
"epoch": 0.03923628470354455, |
|
"grad_norm": 3.195884943008423, |
|
"learning_rate": 4.9999915451558777e-05, |
|
"loss": 2.194, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.04002101039761544, |
|
"grad_norm": 3.154061794281006, |
|
"learning_rate": 4.999955597496219e-05, |
|
"loss": 2.1409, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 0.040805736091686334, |
|
"grad_norm": 2.8204188346862793, |
|
"learning_rate": 4.9998914381774255e-05, |
|
"loss": 2.145, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.041590461785757225, |
|
"grad_norm": 2.98260760307312, |
|
"learning_rate": 4.999799067923527e-05, |
|
"loss": 2.1523, |
|
"step": 1643 |
|
}, |
|
{ |
|
"epoch": 0.042375187479828116, |
|
"grad_norm": 2.917949914932251, |
|
"learning_rate": 4.999678487776908e-05, |
|
"loss": 2.1221, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 0.04315991317389901, |
|
"grad_norm": 2.811469554901123, |
|
"learning_rate": 4.9995296990983006e-05, |
|
"loss": 2.1242, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.043944638867969905, |
|
"grad_norm": 3.067636728286743, |
|
"learning_rate": 4.999352703566763e-05, |
|
"loss": 2.1092, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.044729364562040796, |
|
"grad_norm": 2.6231868267059326, |
|
"learning_rate": 4.999147503179668e-05, |
|
"loss": 2.1018, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 0.04551409025611169, |
|
"grad_norm": 2.8247616291046143, |
|
"learning_rate": 4.998914100252672e-05, |
|
"loss": 2.074, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 0.04629881595018258, |
|
"grad_norm": 2.5960075855255127, |
|
"learning_rate": 4.998652497419696e-05, |
|
"loss": 2.0824, |
|
"step": 1829 |
|
}, |
|
{ |
|
"epoch": 0.04708354164425347, |
|
"grad_norm": 2.7796943187713623, |
|
"learning_rate": 4.9983626976328927e-05, |
|
"loss": 2.0998, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.04786826733832436, |
|
"grad_norm": 2.49242901802063, |
|
"learning_rate": 4.998044704162613e-05, |
|
"loss": 2.0893, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 0.04865299303239525, |
|
"grad_norm": 2.4294378757476807, |
|
"learning_rate": 4.9976985205973705e-05, |
|
"loss": 2.0617, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 0.04943771872646614, |
|
"grad_norm": 2.553217649459839, |
|
"learning_rate": 4.997324150843799e-05, |
|
"loss": 2.0632, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 0.050222444420537034, |
|
"grad_norm": 2.6711318492889404, |
|
"learning_rate": 4.99692159912661e-05, |
|
"loss": 2.0445, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.051007170114607925, |
|
"grad_norm": 2.714432716369629, |
|
"learning_rate": 4.996490869988546e-05, |
|
"loss": 2.0185, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.051791895808678816, |
|
"grad_norm": 2.6516053676605225, |
|
"learning_rate": 4.996031968290326e-05, |
|
"loss": 2.057, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 0.05257662150274971, |
|
"grad_norm": 2.4798831939697266, |
|
"learning_rate": 4.995544899210594e-05, |
|
"loss": 2.0199, |
|
"step": 2077 |
|
}, |
|
{ |
|
"epoch": 0.0533613471968206, |
|
"grad_norm": 2.5150041580200195, |
|
"learning_rate": 4.9950296682458583e-05, |
|
"loss": 2.0264, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 0.05414607289089149, |
|
"grad_norm": 2.637777805328369, |
|
"learning_rate": 4.994486281210429e-05, |
|
"loss": 2.0233, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 0.05493079858496238, |
|
"grad_norm": 2.330376148223877, |
|
"learning_rate": 4.9939147442363566e-05, |
|
"loss": 2.0201, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.05571552427903327, |
|
"grad_norm": 2.3436174392700195, |
|
"learning_rate": 4.9933150637733574e-05, |
|
"loss": 1.9865, |
|
"step": 2201 |
|
}, |
|
{ |
|
"epoch": 0.05650024997310416, |
|
"grad_norm": 2.7756845951080322, |
|
"learning_rate": 4.992687246588743e-05, |
|
"loss": 1.9983, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 0.05728497566717505, |
|
"grad_norm": 2.1725504398345947, |
|
"learning_rate": 4.992031299767347e-05, |
|
"loss": 1.9689, |
|
"step": 2263 |
|
}, |
|
{ |
|
"epoch": 0.058069701361245944, |
|
"grad_norm": 2.2163312435150146, |
|
"learning_rate": 4.9913472307114386e-05, |
|
"loss": 1.9829, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 0.058854427055316835, |
|
"grad_norm": 2.2829232215881348, |
|
"learning_rate": 4.9906350471406446e-05, |
|
"loss": 2.0142, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.059639152749387726, |
|
"grad_norm": 2.239596366882324, |
|
"learning_rate": 4.989894757091861e-05, |
|
"loss": 1.9697, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 0.06042387844345862, |
|
"grad_norm": 2.2926037311553955, |
|
"learning_rate": 4.989126368919158e-05, |
|
"loss": 1.9688, |
|
"step": 2387 |
|
}, |
|
{ |
|
"epoch": 0.06120860413752951, |
|
"grad_norm": 10.08767032623291, |
|
"learning_rate": 4.988329891293693e-05, |
|
"loss": 1.9845, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 0.0619933298316004, |
|
"grad_norm": 2.2427194118499756, |
|
"learning_rate": 4.987505333203608e-05, |
|
"loss": 1.9744, |
|
"step": 2449 |
|
}, |
|
{ |
|
"epoch": 0.06277805552567128, |
|
"grad_norm": 2.5111870765686035, |
|
"learning_rate": 4.9866527039539276e-05, |
|
"loss": 1.9526, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.06356278121974218, |
|
"grad_norm": 2.2100026607513428, |
|
"learning_rate": 4.9857720131664594e-05, |
|
"loss": 1.9826, |
|
"step": 2511 |
|
}, |
|
{ |
|
"epoch": 0.06434750691381307, |
|
"grad_norm": 2.2112088203430176, |
|
"learning_rate": 4.9848632707796773e-05, |
|
"loss": 1.9698, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 0.06513223260788396, |
|
"grad_norm": 2.404014825820923, |
|
"learning_rate": 4.9839264870486155e-05, |
|
"loss": 1.9628, |
|
"step": 2573 |
|
}, |
|
{ |
|
"epoch": 0.06591695830195485, |
|
"grad_norm": 2.526423692703247, |
|
"learning_rate": 4.9829616725447526e-05, |
|
"loss": 1.9481, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 0.06670168399602575, |
|
"grad_norm": 2.2506027221679688, |
|
"learning_rate": 4.981968838155888e-05, |
|
"loss": 1.9418, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.06748640969009663, |
|
"grad_norm": 2.4334371089935303, |
|
"learning_rate": 4.980947995086024e-05, |
|
"loss": 1.9423, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 0.06827113538416753, |
|
"grad_norm": 2.3028314113616943, |
|
"learning_rate": 4.979899154855234e-05, |
|
"loss": 1.9391, |
|
"step": 2697 |
|
}, |
|
{ |
|
"epoch": 0.06905586107823841, |
|
"grad_norm": 2.122143030166626, |
|
"learning_rate": 4.9788223292995386e-05, |
|
"loss": 1.933, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 0.06984058677230931, |
|
"grad_norm": 2.1335129737854004, |
|
"learning_rate": 4.977717530570768e-05, |
|
"loss": 1.9212, |
|
"step": 2759 |
|
}, |
|
{ |
|
"epoch": 0.0706253124663802, |
|
"grad_norm": 2.198650598526001, |
|
"learning_rate": 4.976584771136425e-05, |
|
"loss": 1.9217, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.07141003816045109, |
|
"grad_norm": 2.4985201358795166, |
|
"learning_rate": 4.975424063779547e-05, |
|
"loss": 1.9277, |
|
"step": 2821 |
|
}, |
|
{ |
|
"epoch": 0.07219476385452198, |
|
"grad_norm": 1.9877598285675049, |
|
"learning_rate": 4.974235421598557e-05, |
|
"loss": 1.9278, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 0.07297948954859287, |
|
"grad_norm": 3.0082573890686035, |
|
"learning_rate": 4.973018858007122e-05, |
|
"loss": 1.9261, |
|
"step": 2883 |
|
}, |
|
{ |
|
"epoch": 0.07376421524266376, |
|
"grad_norm": 2.139742851257324, |
|
"learning_rate": 4.9717743867339963e-05, |
|
"loss": 1.9168, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 0.07454894093673466, |
|
"grad_norm": 2.1748037338256836, |
|
"learning_rate": 4.9705020218228695e-05, |
|
"loss": 1.9132, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.07533366663080554, |
|
"grad_norm": 2.0570950508117676, |
|
"learning_rate": 4.969201777632205e-05, |
|
"loss": 1.9177, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.07611839232487644, |
|
"grad_norm": 1.9970216751098633, |
|
"learning_rate": 4.9678736688350846e-05, |
|
"loss": 1.9105, |
|
"step": 3007 |
|
}, |
|
{ |
|
"epoch": 0.07690311801894732, |
|
"grad_norm": 1.9640527963638306, |
|
"learning_rate": 4.966517710419033e-05, |
|
"loss": 1.9084, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 0.07768784371301822, |
|
"grad_norm": 2.172874927520752, |
|
"learning_rate": 4.965133917685858e-05, |
|
"loss": 1.8995, |
|
"step": 3069 |
|
}, |
|
{ |
|
"epoch": 0.0784725694070891, |
|
"grad_norm": 2.1881916522979736, |
|
"learning_rate": 4.9637223062514714e-05, |
|
"loss": 1.9019, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.07925729510116, |
|
"grad_norm": 1.975496530532837, |
|
"learning_rate": 4.962282892045718e-05, |
|
"loss": 1.8967, |
|
"step": 3131 |
|
}, |
|
{ |
|
"epoch": 0.08004202079523089, |
|
"grad_norm": 2.0970685482025146, |
|
"learning_rate": 4.9608156913121904e-05, |
|
"loss": 1.8867, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 0.08082674648930178, |
|
"grad_norm": 2.096353769302368, |
|
"learning_rate": 4.959320720608049e-05, |
|
"loss": 1.8967, |
|
"step": 3193 |
|
}, |
|
{ |
|
"epoch": 0.08161147218337267, |
|
"grad_norm": 1.998336911201477, |
|
"learning_rate": 4.9577979968038354e-05, |
|
"loss": 1.8876, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 0.08239619787744357, |
|
"grad_norm": 2.098055362701416, |
|
"learning_rate": 4.956247537083282e-05, |
|
"loss": 1.9, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.08318092357151445, |
|
"grad_norm": 2.0739505290985107, |
|
"learning_rate": 4.9546693589431145e-05, |
|
"loss": 1.8902, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 0.08396564926558535, |
|
"grad_norm": 1.9556243419647217, |
|
"learning_rate": 4.9530634801928595e-05, |
|
"loss": 1.888, |
|
"step": 3317 |
|
}, |
|
{ |
|
"epoch": 0.08475037495965623, |
|
"grad_norm": 2.096874952316284, |
|
"learning_rate": 4.9514299189546395e-05, |
|
"loss": 1.8785, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 0.08553510065372713, |
|
"grad_norm": 1.9407072067260742, |
|
"learning_rate": 4.949768693662973e-05, |
|
"loss": 1.8646, |
|
"step": 3379 |
|
}, |
|
{ |
|
"epoch": 0.08631982634779801, |
|
"grad_norm": 1.9928467273712158, |
|
"learning_rate": 4.948079823064559e-05, |
|
"loss": 1.8751, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.08710455204186891, |
|
"grad_norm": 1.9670037031173706, |
|
"learning_rate": 4.946363326218074e-05, |
|
"loss": 1.8831, |
|
"step": 3441 |
|
}, |
|
{ |
|
"epoch": 0.08788927773593981, |
|
"grad_norm": 1.999193787574768, |
|
"learning_rate": 4.9446192224939525e-05, |
|
"loss": 1.8605, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 0.0886740034300107, |
|
"grad_norm": 1.9073724746704102, |
|
"learning_rate": 4.942847531574167e-05, |
|
"loss": 1.8576, |
|
"step": 3503 |
|
}, |
|
{ |
|
"epoch": 0.08945872912408159, |
|
"grad_norm": 2.179824113845825, |
|
"learning_rate": 4.941048273452008e-05, |
|
"loss": 1.8682, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 0.09024345481815248, |
|
"grad_norm": 1.954990029335022, |
|
"learning_rate": 4.9392214684318605e-05, |
|
"loss": 1.8807, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.09102818051222338, |
|
"grad_norm": 1.7695640325546265, |
|
"learning_rate": 4.93736713712897e-05, |
|
"loss": 1.879, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 0.09181290620629426, |
|
"grad_norm": 1.7708550691604614, |
|
"learning_rate": 4.9354853004692124e-05, |
|
"loss": 1.8677, |
|
"step": 3627 |
|
}, |
|
{ |
|
"epoch": 0.09259763190036516, |
|
"grad_norm": 1.9683934450149536, |
|
"learning_rate": 4.93357597968886e-05, |
|
"loss": 1.8595, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 0.09338235759443604, |
|
"grad_norm": 2.00441312789917, |
|
"learning_rate": 4.931639196334338e-05, |
|
"loss": 1.8462, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 0.09416708328850694, |
|
"grad_norm": 1.875543475151062, |
|
"learning_rate": 4.9296749722619826e-05, |
|
"loss": 1.8502, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.09495180898257782, |
|
"grad_norm": 1.932658314704895, |
|
"learning_rate": 4.9276833296377966e-05, |
|
"loss": 1.8457, |
|
"step": 3751 |
|
}, |
|
{ |
|
"epoch": 0.09573653467664872, |
|
"grad_norm": 1.9957045316696167, |
|
"learning_rate": 4.925664290937196e-05, |
|
"loss": 1.843, |
|
"step": 3782 |
|
}, |
|
{ |
|
"epoch": 0.0965212603707196, |
|
"grad_norm": 1.8579176664352417, |
|
"learning_rate": 4.9236178789447576e-05, |
|
"loss": 1.8504, |
|
"step": 3813 |
|
}, |
|
{ |
|
"epoch": 0.0973059860647905, |
|
"grad_norm": 1.9646131992340088, |
|
"learning_rate": 4.921544116753962e-05, |
|
"loss": 1.8512, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 0.09809071175886139, |
|
"grad_norm": 1.8213136196136475, |
|
"learning_rate": 4.919443027766935e-05, |
|
"loss": 1.8618, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.09887543745293229, |
|
"grad_norm": 2.017280101776123, |
|
"learning_rate": 4.91731463569418e-05, |
|
"loss": 1.863, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 0.09966016314700317, |
|
"grad_norm": 1.9125665426254272, |
|
"learning_rate": 4.915158964554312e-05, |
|
"loss": 1.8259, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 0.10044488884107407, |
|
"grad_norm": 2.0414695739746094, |
|
"learning_rate": 4.912976038673786e-05, |
|
"loss": 1.8347, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.10122961453514495, |
|
"grad_norm": 1.7705485820770264, |
|
"learning_rate": 4.9107658826866254e-05, |
|
"loss": 1.8502, |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 0.10201434022921585, |
|
"grad_norm": 1.8961102962493896, |
|
"learning_rate": 4.908528521534139e-05, |
|
"loss": 1.84, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.10279906592328673, |
|
"grad_norm": 1.784387230873108, |
|
"learning_rate": 4.906263980464644e-05, |
|
"loss": 1.842, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 0.10358379161735763, |
|
"grad_norm": 11.229472160339355, |
|
"learning_rate": 4.903972285033178e-05, |
|
"loss": 1.8476, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 0.10436851731142852, |
|
"grad_norm": 1.9657154083251953, |
|
"learning_rate": 4.901653461101213e-05, |
|
"loss": 1.8465, |
|
"step": 4123 |
|
}, |
|
{ |
|
"epoch": 0.10515324300549941, |
|
"grad_norm": 1.7702244520187378, |
|
"learning_rate": 4.8993075348363626e-05, |
|
"loss": 1.8249, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 0.1059379686995703, |
|
"grad_norm": 1.8672112226486206, |
|
"learning_rate": 4.896934532712084e-05, |
|
"loss": 1.8232, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.1067226943936412, |
|
"grad_norm": 1.7806147336959839, |
|
"learning_rate": 4.8945344815073846e-05, |
|
"loss": 1.8256, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 0.10750742008771208, |
|
"grad_norm": 1.7830456495285034, |
|
"learning_rate": 4.892107408306516e-05, |
|
"loss": 1.8271, |
|
"step": 4247 |
|
}, |
|
{ |
|
"epoch": 0.10829214578178298, |
|
"grad_norm": 1.96640944480896, |
|
"learning_rate": 4.889653340498669e-05, |
|
"loss": 1.82, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 0.10907687147585386, |
|
"grad_norm": 1.8224470615386963, |
|
"learning_rate": 4.8871723057776664e-05, |
|
"loss": 1.8216, |
|
"step": 4309 |
|
}, |
|
{ |
|
"epoch": 0.10986159716992476, |
|
"grad_norm": 2.5164501667022705, |
|
"learning_rate": 4.8846643321416476e-05, |
|
"loss": 1.8252, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.11064632286399564, |
|
"grad_norm": 1.7248613834381104, |
|
"learning_rate": 4.882129447892753e-05, |
|
"loss": 1.8133, |
|
"step": 4371 |
|
}, |
|
{ |
|
"epoch": 0.11143104855806654, |
|
"grad_norm": 2.060304880142212, |
|
"learning_rate": 4.8795676816368076e-05, |
|
"loss": 1.8282, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 0.11221577425213743, |
|
"grad_norm": 1.8709039688110352, |
|
"learning_rate": 4.876979062282995e-05, |
|
"loss": 1.8154, |
|
"step": 4433 |
|
}, |
|
{ |
|
"epoch": 0.11300049994620832, |
|
"grad_norm": 1.7444674968719482, |
|
"learning_rate": 4.8743636190435325e-05, |
|
"loss": 1.8173, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 0.11378522564027921, |
|
"grad_norm": 1.7357319593429565, |
|
"learning_rate": 4.871721381433344e-05, |
|
"loss": 1.8351, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.1145699513343501, |
|
"grad_norm": 1.728070855140686, |
|
"learning_rate": 4.869052379269719e-05, |
|
"loss": 1.8119, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 0.11535467702842099, |
|
"grad_norm": 1.742035984992981, |
|
"learning_rate": 4.866356642671985e-05, |
|
"loss": 1.7967, |
|
"step": 4557 |
|
}, |
|
{ |
|
"epoch": 0.11613940272249189, |
|
"grad_norm": 1.7010915279388428, |
|
"learning_rate": 4.8636342020611634e-05, |
|
"loss": 1.8004, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 0.11692412841656277, |
|
"grad_norm": 1.6775914430618286, |
|
"learning_rate": 4.860885088159626e-05, |
|
"loss": 1.8173, |
|
"step": 4619 |
|
}, |
|
{ |
|
"epoch": 0.11770885411063367, |
|
"grad_norm": 1.9107964038848877, |
|
"learning_rate": 4.858109331990751e-05, |
|
"loss": 1.7984, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.11849357980470455, |
|
"grad_norm": 1.713429570198059, |
|
"learning_rate": 4.855306964878567e-05, |
|
"loss": 1.7967, |
|
"step": 4681 |
|
}, |
|
{ |
|
"epoch": 0.11927830549877545, |
|
"grad_norm": 1.9373931884765625, |
|
"learning_rate": 4.8524780184474084e-05, |
|
"loss": 1.8072, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 0.12006303119284634, |
|
"grad_norm": 1.8975365161895752, |
|
"learning_rate": 4.8496225246215496e-05, |
|
"loss": 1.8121, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 0.12084775688691723, |
|
"grad_norm": 5.285326957702637, |
|
"learning_rate": 4.8467405156248505e-05, |
|
"loss": 1.8189, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 0.12163248258098812, |
|
"grad_norm": 1.7155263423919678, |
|
"learning_rate": 4.843832023980392e-05, |
|
"loss": 1.8093, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.12241720827505902, |
|
"grad_norm": 1.726831316947937, |
|
"learning_rate": 4.840897082510106e-05, |
|
"loss": 1.7952, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 0.1232019339691299, |
|
"grad_norm": 1.739639401435852, |
|
"learning_rate": 4.8379357243344084e-05, |
|
"loss": 1.8103, |
|
"step": 4867 |
|
}, |
|
{ |
|
"epoch": 0.1239866596632008, |
|
"grad_norm": 1.6978296041488647, |
|
"learning_rate": 4.8349479828718236e-05, |
|
"loss": 1.8006, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 0.12477138535727168, |
|
"grad_norm": 1.7154194116592407, |
|
"learning_rate": 4.8319338918386075e-05, |
|
"loss": 1.7876, |
|
"step": 4929 |
|
}, |
|
{ |
|
"epoch": 0.12555611105134257, |
|
"grad_norm": 1.6323316097259521, |
|
"learning_rate": 4.828893485248369e-05, |
|
"loss": 1.8159, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.12634083674541347, |
|
"grad_norm": 1.641784429550171, |
|
"learning_rate": 4.825826797411682e-05, |
|
"loss": 1.7959, |
|
"step": 4991 |
|
}, |
|
{ |
|
"epoch": 0.12712556243948436, |
|
"grad_norm": 1.6947154998779297, |
|
"learning_rate": 4.822733862935702e-05, |
|
"loss": 1.7895, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 0.12791028813355526, |
|
"grad_norm": 1.6331220865249634, |
|
"learning_rate": 4.819614716723775e-05, |
|
"loss": 1.7707, |
|
"step": 5053 |
|
}, |
|
{ |
|
"epoch": 0.12869501382762613, |
|
"grad_norm": 1.8207937479019165, |
|
"learning_rate": 4.8164693939750425e-05, |
|
"loss": 1.8123, |
|
"step": 5084 |
|
}, |
|
{ |
|
"epoch": 0.12947973952169703, |
|
"grad_norm": 1.6664263010025024, |
|
"learning_rate": 4.813297930184042e-05, |
|
"loss": 1.8089, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.13026446521576793, |
|
"grad_norm": 1.9931398630142212, |
|
"learning_rate": 4.810100361140314e-05, |
|
"loss": 1.7757, |
|
"step": 5146 |
|
}, |
|
{ |
|
"epoch": 0.13104919090983883, |
|
"grad_norm": 1.839200735092163, |
|
"learning_rate": 4.8068767229279885e-05, |
|
"loss": 1.7969, |
|
"step": 5177 |
|
}, |
|
{ |
|
"epoch": 0.1318339166039097, |
|
"grad_norm": 1.781187653541565, |
|
"learning_rate": 4.8036270519253854e-05, |
|
"loss": 1.7937, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 0.1326186422979806, |
|
"grad_norm": 1.7144343852996826, |
|
"learning_rate": 4.8003513848046e-05, |
|
"loss": 1.7816, |
|
"step": 5239 |
|
}, |
|
{ |
|
"epoch": 0.1334033679920515, |
|
"grad_norm": 1.6819554567337036, |
|
"learning_rate": 4.79704975853109e-05, |
|
"loss": 1.7851, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.1341880936861224, |
|
"grad_norm": 1.6748546361923218, |
|
"learning_rate": 4.793722210363262e-05, |
|
"loss": 1.7941, |
|
"step": 5301 |
|
}, |
|
{ |
|
"epoch": 0.13497281938019326, |
|
"grad_norm": 1.615569829940796, |
|
"learning_rate": 4.7903687778520414e-05, |
|
"loss": 1.7799, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 0.13575754507426416, |
|
"grad_norm": 1.7959198951721191, |
|
"learning_rate": 4.7869894988404593e-05, |
|
"loss": 1.7802, |
|
"step": 5363 |
|
}, |
|
{ |
|
"epoch": 0.13654227076833506, |
|
"grad_norm": 1.598946452140808, |
|
"learning_rate": 4.783584411463221e-05, |
|
"loss": 1.7929, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 0.13732699646240595, |
|
"grad_norm": 1.793511986732483, |
|
"learning_rate": 4.780153554146274e-05, |
|
"loss": 1.7591, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.13811172215647682, |
|
"grad_norm": 1.718671202659607, |
|
"learning_rate": 4.7766969656063766e-05, |
|
"loss": 1.7807, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 0.13889644785054772, |
|
"grad_norm": 1.6548669338226318, |
|
"learning_rate": 4.773214684850662e-05, |
|
"loss": 1.775, |
|
"step": 5487 |
|
}, |
|
{ |
|
"epoch": 0.13968117354461862, |
|
"grad_norm": 1.6727256774902344, |
|
"learning_rate": 4.769706751176193e-05, |
|
"loss": 1.7756, |
|
"step": 5518 |
|
}, |
|
{ |
|
"epoch": 0.14046589923868952, |
|
"grad_norm": 1.7169344425201416, |
|
"learning_rate": 4.7661732041695264e-05, |
|
"loss": 1.7887, |
|
"step": 5549 |
|
}, |
|
{ |
|
"epoch": 0.1412506249327604, |
|
"grad_norm": 1.6376421451568604, |
|
"learning_rate": 4.762614083706258e-05, |
|
"loss": 1.7939, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.14203535062683129, |
|
"grad_norm": 1.7083207368850708, |
|
"learning_rate": 4.759029429950581e-05, |
|
"loss": 1.7705, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 0.14282007632090218, |
|
"grad_norm": 1.6359349489212036, |
|
"learning_rate": 4.7554192833548235e-05, |
|
"loss": 1.7732, |
|
"step": 5642 |
|
}, |
|
{ |
|
"epoch": 0.14360480201497308, |
|
"grad_norm": 1.684005618095398, |
|
"learning_rate": 4.751783684659e-05, |
|
"loss": 1.7766, |
|
"step": 5673 |
|
}, |
|
{ |
|
"epoch": 0.14438952770904395, |
|
"grad_norm": 1.7531359195709229, |
|
"learning_rate": 4.748122674890348e-05, |
|
"loss": 1.7815, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 0.14517425340311485, |
|
"grad_norm": 1.5898247957229614, |
|
"learning_rate": 4.7444362953628654e-05, |
|
"loss": 1.7837, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 0.14595897909718575, |
|
"grad_norm": 1.6781623363494873, |
|
"learning_rate": 4.7407245876768424e-05, |
|
"loss": 1.7381, |
|
"step": 5766 |
|
}, |
|
{ |
|
"epoch": 0.14674370479125665, |
|
"grad_norm": 1.6126357316970825, |
|
"learning_rate": 4.736987593718397e-05, |
|
"loss": 1.7714, |
|
"step": 5797 |
|
}, |
|
{ |
|
"epoch": 0.14752843048532752, |
|
"grad_norm": 1.6623587608337402, |
|
"learning_rate": 4.733225355658999e-05, |
|
"loss": 1.7625, |
|
"step": 5828 |
|
}, |
|
{ |
|
"epoch": 0.14831315617939841, |
|
"grad_norm": 1.6715524196624756, |
|
"learning_rate": 4.7294379159549926e-05, |
|
"loss": 1.7631, |
|
"step": 5859 |
|
}, |
|
{ |
|
"epoch": 0.1490978818734693, |
|
"grad_norm": 1.6739026308059692, |
|
"learning_rate": 4.725625317347119e-05, |
|
"loss": 1.775, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.1498826075675402, |
|
"grad_norm": 1.8141075372695923, |
|
"learning_rate": 4.7217876028600374e-05, |
|
"loss": 1.7881, |
|
"step": 5921 |
|
}, |
|
{ |
|
"epoch": 0.15066733326161108, |
|
"grad_norm": 1.6842069625854492, |
|
"learning_rate": 4.717924815801832e-05, |
|
"loss": 1.7707, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.15145205895568198, |
|
"grad_norm": 1.7032698392868042, |
|
"learning_rate": 4.714036999763532e-05, |
|
"loss": 1.7631, |
|
"step": 5983 |
|
}, |
|
{ |
|
"epoch": 0.15223678464975288, |
|
"grad_norm": 1.7856013774871826, |
|
"learning_rate": 4.7101241986186116e-05, |
|
"loss": 1.7545, |
|
"step": 6014 |
|
}, |
|
{ |
|
"epoch": 0.15302151034382377, |
|
"grad_norm": 1.679623007774353, |
|
"learning_rate": 4.7061864565225e-05, |
|
"loss": 1.7676, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 0.15380623603789464, |
|
"grad_norm": 1.626792073249817, |
|
"learning_rate": 4.702223817912081e-05, |
|
"loss": 1.7434, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 0.15459096173196554, |
|
"grad_norm": 1.850042700767517, |
|
"learning_rate": 4.698236327505195e-05, |
|
"loss": 1.7805, |
|
"step": 6107 |
|
}, |
|
{ |
|
"epoch": 0.15537568742603644, |
|
"grad_norm": 1.6403062343597412, |
|
"learning_rate": 4.694224030300127e-05, |
|
"loss": 1.7495, |
|
"step": 6138 |
|
}, |
|
{ |
|
"epoch": 0.15616041312010734, |
|
"grad_norm": 1.5897477865219116, |
|
"learning_rate": 4.690186971575107e-05, |
|
"loss": 1.779, |
|
"step": 6169 |
|
}, |
|
{ |
|
"epoch": 0.1569451388141782, |
|
"grad_norm": 1.8173433542251587, |
|
"learning_rate": 4.6861251968877916e-05, |
|
"loss": 1.7705, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.1577298645082491, |
|
"grad_norm": 1.788022756576538, |
|
"learning_rate": 4.68203875207476e-05, |
|
"loss": 1.7457, |
|
"step": 6231 |
|
}, |
|
{ |
|
"epoch": 0.15851459020232, |
|
"grad_norm": 1.6219838857650757, |
|
"learning_rate": 4.677927683250983e-05, |
|
"loss": 1.7758, |
|
"step": 6262 |
|
}, |
|
{ |
|
"epoch": 0.1592993158963909, |
|
"grad_norm": 1.678890347480774, |
|
"learning_rate": 4.6737920368093156e-05, |
|
"loss": 1.7394, |
|
"step": 6293 |
|
}, |
|
{ |
|
"epoch": 0.16008404159046177, |
|
"grad_norm": 1.5719743967056274, |
|
"learning_rate": 4.669631859419965e-05, |
|
"loss": 1.7549, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 0.16086876728453267, |
|
"grad_norm": 1.6332769393920898, |
|
"learning_rate": 4.6654471980299676e-05, |
|
"loss": 1.7462, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 0.16165349297860357, |
|
"grad_norm": 1.6942561864852905, |
|
"learning_rate": 4.661238099862658e-05, |
|
"loss": 1.7506, |
|
"step": 6386 |
|
}, |
|
{ |
|
"epoch": 0.16243821867267447, |
|
"grad_norm": 1.8173885345458984, |
|
"learning_rate": 4.657004612417138e-05, |
|
"loss": 1.7455, |
|
"step": 6417 |
|
}, |
|
{ |
|
"epoch": 0.16322294436674534, |
|
"grad_norm": 1.6209042072296143, |
|
"learning_rate": 4.6527467834677374e-05, |
|
"loss": 1.7413, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 0.16400767006081624, |
|
"grad_norm": 1.5801094770431519, |
|
"learning_rate": 4.648464661063478e-05, |
|
"loss": 1.7491, |
|
"step": 6479 |
|
}, |
|
{ |
|
"epoch": 0.16479239575488713, |
|
"grad_norm": 1.5499264001846313, |
|
"learning_rate": 4.6441582935275264e-05, |
|
"loss": 1.7276, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.16557712144895803, |
|
"grad_norm": 1.6154171228408813, |
|
"learning_rate": 4.6398277294566586e-05, |
|
"loss": 1.7816, |
|
"step": 6541 |
|
}, |
|
{ |
|
"epoch": 0.1663618471430289, |
|
"grad_norm": 1.5633410215377808, |
|
"learning_rate": 4.6354730177207e-05, |
|
"loss": 1.7447, |
|
"step": 6572 |
|
}, |
|
{ |
|
"epoch": 0.1671465728370998, |
|
"grad_norm": 1.7070655822753906, |
|
"learning_rate": 4.6310942074619787e-05, |
|
"loss": 1.7477, |
|
"step": 6603 |
|
}, |
|
{ |
|
"epoch": 0.1679312985311707, |
|
"grad_norm": 1.7502373456954956, |
|
"learning_rate": 4.626691348094777e-05, |
|
"loss": 1.74, |
|
"step": 6634 |
|
}, |
|
{ |
|
"epoch": 0.1687160242252416, |
|
"grad_norm": 1.9541263580322266, |
|
"learning_rate": 4.622264489304762e-05, |
|
"loss": 1.7389, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 0.16950074991931247, |
|
"grad_norm": 1.64599609375, |
|
"learning_rate": 4.617813681048434e-05, |
|
"loss": 1.7445, |
|
"step": 6696 |
|
}, |
|
{ |
|
"epoch": 0.17028547561338336, |
|
"grad_norm": 1.9360859394073486, |
|
"learning_rate": 4.61333897355256e-05, |
|
"loss": 1.73, |
|
"step": 6727 |
|
}, |
|
{ |
|
"epoch": 0.17107020130745426, |
|
"grad_norm": 1.693892240524292, |
|
"learning_rate": 4.608840417313604e-05, |
|
"loss": 1.7229, |
|
"step": 6758 |
|
}, |
|
{ |
|
"epoch": 0.17185492700152516, |
|
"grad_norm": 1.6243150234222412, |
|
"learning_rate": 4.6043180630971646e-05, |
|
"loss": 1.7421, |
|
"step": 6789 |
|
}, |
|
{ |
|
"epoch": 0.17263965269559603, |
|
"grad_norm": 1.5926107168197632, |
|
"learning_rate": 4.599771961937391e-05, |
|
"loss": 1.7447, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.17342437838966693, |
|
"grad_norm": 1.695167064666748, |
|
"learning_rate": 4.5952021651364204e-05, |
|
"loss": 1.7463, |
|
"step": 6851 |
|
}, |
|
{ |
|
"epoch": 0.17420910408373783, |
|
"grad_norm": 1.5915182828903198, |
|
"learning_rate": 4.590608724263786e-05, |
|
"loss": 1.7198, |
|
"step": 6882 |
|
}, |
|
{ |
|
"epoch": 0.17499382977780872, |
|
"grad_norm": 1.6135920286178589, |
|
"learning_rate": 4.585991691155845e-05, |
|
"loss": 1.7233, |
|
"step": 6913 |
|
}, |
|
{ |
|
"epoch": 0.17577855547187962, |
|
"grad_norm": 1.5855350494384766, |
|
"learning_rate": 4.581351117915188e-05, |
|
"loss": 1.7519, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 0.1765632811659505, |
|
"grad_norm": 1.5782060623168945, |
|
"learning_rate": 4.5766870569100534e-05, |
|
"loss": 1.729, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.1773480068600214, |
|
"grad_norm": 1.4931174516677856, |
|
"learning_rate": 4.571999560773736e-05, |
|
"loss": 1.7197, |
|
"step": 7006 |
|
}, |
|
{ |
|
"epoch": 0.1781327325540923, |
|
"grad_norm": 1.809645414352417, |
|
"learning_rate": 4.5672886824039915e-05, |
|
"loss": 1.7409, |
|
"step": 7037 |
|
}, |
|
{ |
|
"epoch": 0.17891745824816319, |
|
"grad_norm": 1.544233798980713, |
|
"learning_rate": 4.5625544749624435e-05, |
|
"loss": 1.7331, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 0.17970218394223406, |
|
"grad_norm": 1.5316941738128662, |
|
"learning_rate": 4.5577969918739794e-05, |
|
"loss": 1.7245, |
|
"step": 7099 |
|
}, |
|
{ |
|
"epoch": 0.18048690963630495, |
|
"grad_norm": 1.4646427631378174, |
|
"learning_rate": 4.5530162868261486e-05, |
|
"loss": 1.7341, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.18127163533037585, |
|
"grad_norm": 1.6266372203826904, |
|
"learning_rate": 4.548212413768558e-05, |
|
"loss": 1.7311, |
|
"step": 7161 |
|
}, |
|
{ |
|
"epoch": 0.18205636102444675, |
|
"grad_norm": 1.6372709274291992, |
|
"learning_rate": 4.543385426912261e-05, |
|
"loss": 1.7344, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 0.18284108671851762, |
|
"grad_norm": 1.642005443572998, |
|
"learning_rate": 4.53853538072915e-05, |
|
"loss": 1.7472, |
|
"step": 7223 |
|
}, |
|
{ |
|
"epoch": 0.18362581241258852, |
|
"grad_norm": 1.7344322204589844, |
|
"learning_rate": 4.533662329951336e-05, |
|
"loss": 1.7379, |
|
"step": 7254 |
|
}, |
|
{ |
|
"epoch": 0.18441053810665942, |
|
"grad_norm": 1.6593672037124634, |
|
"learning_rate": 4.528766329570536e-05, |
|
"loss": 1.7363, |
|
"step": 7285 |
|
}, |
|
{ |
|
"epoch": 0.18519526380073031, |
|
"grad_norm": 1.590846300125122, |
|
"learning_rate": 4.523847434837447e-05, |
|
"loss": 1.7432, |
|
"step": 7316 |
|
}, |
|
{ |
|
"epoch": 0.18597998949480118, |
|
"grad_norm": 1.6701788902282715, |
|
"learning_rate": 4.518905701261128e-05, |
|
"loss": 1.7287, |
|
"step": 7347 |
|
}, |
|
{ |
|
"epoch": 0.18676471518887208, |
|
"grad_norm": 1.6129958629608154, |
|
"learning_rate": 4.5139411846083715e-05, |
|
"loss": 1.7252, |
|
"step": 7378 |
|
}, |
|
{ |
|
"epoch": 0.18754944088294298, |
|
"grad_norm": 1.5602383613586426, |
|
"learning_rate": 4.508953940903073e-05, |
|
"loss": 1.7365, |
|
"step": 7409 |
|
}, |
|
{ |
|
"epoch": 0.18833416657701388, |
|
"grad_norm": 1.60308039188385, |
|
"learning_rate": 4.5039440264255994e-05, |
|
"loss": 1.7361, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.18911889227108475, |
|
"grad_norm": 1.588299036026001, |
|
"learning_rate": 4.498911497712155e-05, |
|
"loss": 1.7574, |
|
"step": 7471 |
|
}, |
|
{ |
|
"epoch": 0.18990361796515565, |
|
"grad_norm": 1.5599571466445923, |
|
"learning_rate": 4.493856411554142e-05, |
|
"loss": 1.738, |
|
"step": 7502 |
|
}, |
|
{ |
|
"epoch": 0.19068834365922654, |
|
"grad_norm": 1.5749436616897583, |
|
"learning_rate": 4.4887788249975206e-05, |
|
"loss": 1.7272, |
|
"step": 7533 |
|
}, |
|
{ |
|
"epoch": 0.19147306935329744, |
|
"grad_norm": 1.5536047220230103, |
|
"learning_rate": 4.4836787953421656e-05, |
|
"loss": 1.7249, |
|
"step": 7564 |
|
}, |
|
{ |
|
"epoch": 0.1922577950473683, |
|
"grad_norm": 1.5227411985397339, |
|
"learning_rate": 4.478556380141218e-05, |
|
"loss": 1.7137, |
|
"step": 7595 |
|
}, |
|
{ |
|
"epoch": 0.1930425207414392, |
|
"grad_norm": 1.5771219730377197, |
|
"learning_rate": 4.4734116372004375e-05, |
|
"loss": 1.7094, |
|
"step": 7626 |
|
}, |
|
{ |
|
"epoch": 0.1938272464355101, |
|
"grad_norm": 1.4533522129058838, |
|
"learning_rate": 4.4682446245775477e-05, |
|
"loss": 1.7493, |
|
"step": 7657 |
|
}, |
|
{ |
|
"epoch": 0.194611972129581, |
|
"grad_norm": 1.5640264749526978, |
|
"learning_rate": 4.463055400581586e-05, |
|
"loss": 1.7228, |
|
"step": 7688 |
|
}, |
|
{ |
|
"epoch": 0.19539669782365188, |
|
"grad_norm": 1.4606215953826904, |
|
"learning_rate": 4.4578440237722374e-05, |
|
"loss": 1.7414, |
|
"step": 7719 |
|
}, |
|
{ |
|
"epoch": 0.19618142351772277, |
|
"grad_norm": 1.5216374397277832, |
|
"learning_rate": 4.452610552959183e-05, |
|
"loss": 1.7155, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.19696614921179367, |
|
"grad_norm": 1.683119535446167, |
|
"learning_rate": 4.447355047201428e-05, |
|
"loss": 1.7346, |
|
"step": 7781 |
|
}, |
|
{ |
|
"epoch": 0.19775087490586457, |
|
"grad_norm": 1.6055350303649902, |
|
"learning_rate": 4.4420775658066414e-05, |
|
"loss": 1.7112, |
|
"step": 7812 |
|
}, |
|
{ |
|
"epoch": 0.19853560059993544, |
|
"grad_norm": 1.514739751815796, |
|
"learning_rate": 4.436778168330484e-05, |
|
"loss": 1.7274, |
|
"step": 7843 |
|
}, |
|
{ |
|
"epoch": 0.19932032629400634, |
|
"grad_norm": 2.131218433380127, |
|
"learning_rate": 4.4314569145759353e-05, |
|
"loss": 1.7127, |
|
"step": 7874 |
|
}, |
|
{ |
|
"epoch": 0.20010505198807724, |
|
"grad_norm": 1.4867665767669678, |
|
"learning_rate": 4.42611386459262e-05, |
|
"loss": 1.7245, |
|
"step": 7905 |
|
}, |
|
{ |
|
"epoch": 0.20088977768214814, |
|
"grad_norm": 1.6395418643951416, |
|
"learning_rate": 4.420749078676133e-05, |
|
"loss": 1.7146, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.201674503376219, |
|
"grad_norm": 1.629939079284668, |
|
"learning_rate": 4.4153626173673516e-05, |
|
"loss": 1.7153, |
|
"step": 7967 |
|
}, |
|
{ |
|
"epoch": 0.2024592290702899, |
|
"grad_norm": 1.5973584651947021, |
|
"learning_rate": 4.409954541451762e-05, |
|
"loss": 1.7102, |
|
"step": 7998 |
|
}, |
|
{ |
|
"epoch": 0.2032439547643608, |
|
"grad_norm": 1.4822708368301392, |
|
"learning_rate": 4.404524911958764e-05, |
|
"loss": 1.7046, |
|
"step": 8029 |
|
}, |
|
{ |
|
"epoch": 0.2040286804584317, |
|
"grad_norm": 1.4706634283065796, |
|
"learning_rate": 4.399073790160989e-05, |
|
"loss": 1.7022, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.20481340615250257, |
|
"grad_norm": 1.5917459726333618, |
|
"learning_rate": 4.393601237573607e-05, |
|
"loss": 1.6983, |
|
"step": 8091 |
|
}, |
|
{ |
|
"epoch": 0.20559813184657347, |
|
"grad_norm": 1.7328417301177979, |
|
"learning_rate": 4.388107315953628e-05, |
|
"loss": 1.7164, |
|
"step": 8122 |
|
}, |
|
{ |
|
"epoch": 0.20638285754064437, |
|
"grad_norm": 1.6152797937393188, |
|
"learning_rate": 4.382592087299212e-05, |
|
"loss": 1.7302, |
|
"step": 8153 |
|
}, |
|
{ |
|
"epoch": 0.20716758323471526, |
|
"grad_norm": 1.7153429985046387, |
|
"learning_rate": 4.377055613848964e-05, |
|
"loss": 1.7278, |
|
"step": 8184 |
|
}, |
|
{ |
|
"epoch": 0.20795230892878613, |
|
"grad_norm": 1.7167855501174927, |
|
"learning_rate": 4.3714979580812355e-05, |
|
"loss": 1.7021, |
|
"step": 8215 |
|
}, |
|
{ |
|
"epoch": 0.20873703462285703, |
|
"grad_norm": 1.458811640739441, |
|
"learning_rate": 4.365919182713416e-05, |
|
"loss": 1.7099, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 0.20952176031692793, |
|
"grad_norm": 5.516291618347168, |
|
"learning_rate": 4.360319350701226e-05, |
|
"loss": 1.7069, |
|
"step": 8277 |
|
}, |
|
{ |
|
"epoch": 0.21030648601099883, |
|
"grad_norm": 1.5669766664505005, |
|
"learning_rate": 4.3546985252380115e-05, |
|
"loss": 1.6983, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 0.2110912117050697, |
|
"grad_norm": 1.4598067998886108, |
|
"learning_rate": 4.349056769754021e-05, |
|
"loss": 1.7265, |
|
"step": 8339 |
|
}, |
|
{ |
|
"epoch": 0.2118759373991406, |
|
"grad_norm": 1.5436547994613647, |
|
"learning_rate": 4.3433941479156994e-05, |
|
"loss": 1.7128, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.2126606630932115, |
|
"grad_norm": 1.6275660991668701, |
|
"learning_rate": 4.3377107236249647e-05, |
|
"loss": 1.7229, |
|
"step": 8401 |
|
}, |
|
{ |
|
"epoch": 0.2134453887872824, |
|
"grad_norm": 1.6207513809204102, |
|
"learning_rate": 4.332006561018488e-05, |
|
"loss": 1.702, |
|
"step": 8432 |
|
}, |
|
{ |
|
"epoch": 0.21423011448135326, |
|
"grad_norm": 1.6795597076416016, |
|
"learning_rate": 4.3262817244669683e-05, |
|
"loss": 1.6808, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 0.21501484017542416, |
|
"grad_norm": 1.660192608833313, |
|
"learning_rate": 4.3205362785744083e-05, |
|
"loss": 1.7071, |
|
"step": 8494 |
|
}, |
|
{ |
|
"epoch": 0.21579956586949506, |
|
"grad_norm": 1.6086353063583374, |
|
"learning_rate": 4.314770288177384e-05, |
|
"loss": 1.7083, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.21658429156356596, |
|
"grad_norm": 1.475216269493103, |
|
"learning_rate": 4.308983818344313e-05, |
|
"loss": 1.7234, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 0.21736901725763683, |
|
"grad_norm": 1.7111340761184692, |
|
"learning_rate": 4.3031769343747206e-05, |
|
"loss": 1.6872, |
|
"step": 8587 |
|
}, |
|
{ |
|
"epoch": 0.21815374295170772, |
|
"grad_norm": 1.4544799327850342, |
|
"learning_rate": 4.297349701798505e-05, |
|
"loss": 1.692, |
|
"step": 8618 |
|
}, |
|
{ |
|
"epoch": 0.21893846864577862, |
|
"grad_norm": 1.6593588590621948, |
|
"learning_rate": 4.2915021863751916e-05, |
|
"loss": 1.6886, |
|
"step": 8649 |
|
}, |
|
{ |
|
"epoch": 0.21972319433984952, |
|
"grad_norm": 1.641408085823059, |
|
"learning_rate": 4.285634454093198e-05, |
|
"loss": 1.6872, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.2205079200339204, |
|
"grad_norm": 1.6036972999572754, |
|
"learning_rate": 4.279746571169086e-05, |
|
"loss": 1.7055, |
|
"step": 8711 |
|
}, |
|
{ |
|
"epoch": 0.2212926457279913, |
|
"grad_norm": 1.4984327554702759, |
|
"learning_rate": 4.2738386040468136e-05, |
|
"loss": 1.6997, |
|
"step": 8742 |
|
}, |
|
{ |
|
"epoch": 0.2220773714220622, |
|
"grad_norm": 1.471111536026001, |
|
"learning_rate": 4.2679106193969866e-05, |
|
"loss": 1.6926, |
|
"step": 8773 |
|
}, |
|
{ |
|
"epoch": 0.22286209711613308, |
|
"grad_norm": 1.521364688873291, |
|
"learning_rate": 4.261962684116106e-05, |
|
"loss": 1.6851, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 0.22364682281020395, |
|
"grad_norm": 1.6068321466445923, |
|
"learning_rate": 4.2559948653258145e-05, |
|
"loss": 1.7113, |
|
"step": 8835 |
|
}, |
|
{ |
|
"epoch": 0.22443154850427485, |
|
"grad_norm": 1.453379511833191, |
|
"learning_rate": 4.250007230372134e-05, |
|
"loss": 1.7025, |
|
"step": 8866 |
|
}, |
|
{ |
|
"epoch": 0.22521627419834575, |
|
"grad_norm": 1.5845959186553955, |
|
"learning_rate": 4.2439998468247126e-05, |
|
"loss": 1.6978, |
|
"step": 8897 |
|
}, |
|
{ |
|
"epoch": 0.22600099989241665, |
|
"grad_norm": 1.5308622121810913, |
|
"learning_rate": 4.2379727824760566e-05, |
|
"loss": 1.6956, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 0.22678572558648752, |
|
"grad_norm": 1.6339962482452393, |
|
"learning_rate": 4.231926105340768e-05, |
|
"loss": 1.6831, |
|
"step": 8959 |
|
}, |
|
{ |
|
"epoch": 0.22757045128055842, |
|
"grad_norm": 1.4533487558364868, |
|
"learning_rate": 4.225859883654776e-05, |
|
"loss": 1.7025, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.22835517697462931, |
|
"grad_norm": 3.971897840499878, |
|
"learning_rate": 4.219774185874569e-05, |
|
"loss": 1.689, |
|
"step": 9021 |
|
}, |
|
{ |
|
"epoch": 0.2291399026687002, |
|
"grad_norm": 1.4394114017486572, |
|
"learning_rate": 4.213669080676418e-05, |
|
"loss": 1.6841, |
|
"step": 9052 |
|
}, |
|
{ |
|
"epoch": 0.22992462836277108, |
|
"grad_norm": 1.821142315864563, |
|
"learning_rate": 4.2075446369556056e-05, |
|
"loss": 1.6883, |
|
"step": 9083 |
|
}, |
|
{ |
|
"epoch": 0.23070935405684198, |
|
"grad_norm": 1.6653649806976318, |
|
"learning_rate": 4.201400923825648e-05, |
|
"loss": 1.7011, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 0.23149407975091288, |
|
"grad_norm": 1.5895901918411255, |
|
"learning_rate": 4.195238010617511e-05, |
|
"loss": 1.7004, |
|
"step": 9145 |
|
}, |
|
{ |
|
"epoch": 0.23227880544498378, |
|
"grad_norm": 1.4648844003677368, |
|
"learning_rate": 4.1890559668788344e-05, |
|
"loss": 1.6872, |
|
"step": 9176 |
|
}, |
|
{ |
|
"epoch": 0.23306353113905465, |
|
"grad_norm": 1.5886753797531128, |
|
"learning_rate": 4.1828548623731405e-05, |
|
"loss": 1.6851, |
|
"step": 9207 |
|
}, |
|
{ |
|
"epoch": 0.23384825683312555, |
|
"grad_norm": 1.4713412523269653, |
|
"learning_rate": 4.1766347670790506e-05, |
|
"loss": 1.6818, |
|
"step": 9238 |
|
}, |
|
{ |
|
"epoch": 0.23463298252719644, |
|
"grad_norm": 1.5660710334777832, |
|
"learning_rate": 4.170395751189495e-05, |
|
"loss": 1.6844, |
|
"step": 9269 |
|
}, |
|
{ |
|
"epoch": 0.23541770822126734, |
|
"grad_norm": 1.7024312019348145, |
|
"learning_rate": 4.164137885110921e-05, |
|
"loss": 1.6839, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.2362024339153382, |
|
"grad_norm": 1.5936214923858643, |
|
"learning_rate": 4.157861239462495e-05, |
|
"loss": 1.6953, |
|
"step": 9331 |
|
}, |
|
{ |
|
"epoch": 0.2369871596094091, |
|
"grad_norm": 1.4709779024124146, |
|
"learning_rate": 4.1515658850753114e-05, |
|
"loss": 1.6806, |
|
"step": 9362 |
|
}, |
|
{ |
|
"epoch": 0.23777188530348, |
|
"grad_norm": 1.4303510189056396, |
|
"learning_rate": 4.145251892991588e-05, |
|
"loss": 1.6792, |
|
"step": 9393 |
|
}, |
|
{ |
|
"epoch": 0.2385566109975509, |
|
"grad_norm": 1.5452120304107666, |
|
"learning_rate": 4.138919334463868e-05, |
|
"loss": 1.6712, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 0.23934133669162178, |
|
"grad_norm": 1.4944697618484497, |
|
"learning_rate": 4.1325682809542124e-05, |
|
"loss": 1.6777, |
|
"step": 9455 |
|
}, |
|
{ |
|
"epoch": 0.24012606238569267, |
|
"grad_norm": 1.6359312534332275, |
|
"learning_rate": 4.126198804133398e-05, |
|
"loss": 1.6782, |
|
"step": 9486 |
|
}, |
|
{ |
|
"epoch": 0.24091078807976357, |
|
"grad_norm": 1.3874454498291016, |
|
"learning_rate": 4.1198109758801055e-05, |
|
"loss": 1.6805, |
|
"step": 9517 |
|
}, |
|
{ |
|
"epoch": 0.24169551377383447, |
|
"grad_norm": 1.4747340679168701, |
|
"learning_rate": 4.113404868280107e-05, |
|
"loss": 1.6704, |
|
"step": 9548 |
|
}, |
|
{ |
|
"epoch": 0.24248023946790534, |
|
"grad_norm": 1.95576012134552, |
|
"learning_rate": 4.106980553625457e-05, |
|
"loss": 1.7008, |
|
"step": 9579 |
|
}, |
|
{ |
|
"epoch": 0.24326496516197624, |
|
"grad_norm": 1.454005479812622, |
|
"learning_rate": 4.100538104413674e-05, |
|
"loss": 1.6771, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.24404969085604714, |
|
"grad_norm": 1.5640463829040527, |
|
"learning_rate": 4.09407759334692e-05, |
|
"loss": 1.6763, |
|
"step": 9641 |
|
}, |
|
{ |
|
"epoch": 0.24483441655011803, |
|
"grad_norm": 1.5076780319213867, |
|
"learning_rate": 4.087599093331186e-05, |
|
"loss": 1.6977, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 0.2456191422441889, |
|
"grad_norm": 1.5072520971298218, |
|
"learning_rate": 4.081102677475462e-05, |
|
"loss": 1.6749, |
|
"step": 9703 |
|
}, |
|
{ |
|
"epoch": 0.2464038679382598, |
|
"grad_norm": 1.6311815977096558, |
|
"learning_rate": 4.0745884190909194e-05, |
|
"loss": 1.684, |
|
"step": 9734 |
|
}, |
|
{ |
|
"epoch": 0.2471885936323307, |
|
"grad_norm": 1.5691202878952026, |
|
"learning_rate": 4.0680563916900796e-05, |
|
"loss": 1.6804, |
|
"step": 9765 |
|
}, |
|
{ |
|
"epoch": 0.2479733193264016, |
|
"grad_norm": 1.4325530529022217, |
|
"learning_rate": 4.0615066689859815e-05, |
|
"loss": 1.719, |
|
"step": 9796 |
|
}, |
|
{ |
|
"epoch": 0.24875804502047247, |
|
"grad_norm": 1.439177393913269, |
|
"learning_rate": 4.0549393248913584e-05, |
|
"loss": 1.6873, |
|
"step": 9827 |
|
}, |
|
{ |
|
"epoch": 0.24954277071454337, |
|
"grad_norm": 1.4155471324920654, |
|
"learning_rate": 4.048354433517794e-05, |
|
"loss": 1.692, |
|
"step": 9858 |
|
}, |
|
{ |
|
"epoch": 0.25032749640861424, |
|
"grad_norm": 1.5917115211486816, |
|
"learning_rate": 4.0417520691748916e-05, |
|
"loss": 1.6752, |
|
"step": 9889 |
|
}, |
|
{ |
|
"epoch": 0.25111222210268513, |
|
"grad_norm": 1.649154543876648, |
|
"learning_rate": 4.035132306369438e-05, |
|
"loss": 1.6603, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.25189694779675603, |
|
"grad_norm": 1.5114792585372925, |
|
"learning_rate": 4.028495219804555e-05, |
|
"loss": 1.7005, |
|
"step": 9951 |
|
}, |
|
{ |
|
"epoch": 0.25268167349082693, |
|
"grad_norm": 16.910812377929688, |
|
"learning_rate": 4.021840884378864e-05, |
|
"loss": 1.6846, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 0.25346639918489783, |
|
"grad_norm": 1.4342628717422485, |
|
"learning_rate": 4.015169375185633e-05, |
|
"loss": 1.6678, |
|
"step": 10013 |
|
}, |
|
{ |
|
"epoch": 0.2542511248789687, |
|
"grad_norm": 1.4815376996994019, |
|
"learning_rate": 4.0084807675119396e-05, |
|
"loss": 1.671, |
|
"step": 10044 |
|
}, |
|
{ |
|
"epoch": 0.2550358505730396, |
|
"grad_norm": 1.4633368253707886, |
|
"learning_rate": 4.0017751368378106e-05, |
|
"loss": 1.6824, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 0.2558205762671105, |
|
"grad_norm": 1.3904149532318115, |
|
"learning_rate": 3.995052558835377e-05, |
|
"loss": 1.6775, |
|
"step": 10106 |
|
}, |
|
{ |
|
"epoch": 0.25660530196118136, |
|
"grad_norm": 1.5234646797180176, |
|
"learning_rate": 3.988313109368017e-05, |
|
"loss": 1.6854, |
|
"step": 10137 |
|
}, |
|
{ |
|
"epoch": 0.25739002765525226, |
|
"grad_norm": 1.4530494213104248, |
|
"learning_rate": 3.981556864489504e-05, |
|
"loss": 1.6727, |
|
"step": 10168 |
|
}, |
|
{ |
|
"epoch": 0.25817475334932316, |
|
"grad_norm": 1.5600273609161377, |
|
"learning_rate": 3.974783900443142e-05, |
|
"loss": 1.6645, |
|
"step": 10199 |
|
}, |
|
{ |
|
"epoch": 0.25895947904339406, |
|
"grad_norm": 1.4213160276412964, |
|
"learning_rate": 3.9679942936609095e-05, |
|
"loss": 1.6898, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.25974420473746496, |
|
"grad_norm": 1.5741041898727417, |
|
"learning_rate": 3.961188120762596e-05, |
|
"loss": 1.693, |
|
"step": 10261 |
|
}, |
|
{ |
|
"epoch": 0.26052893043153585, |
|
"grad_norm": 1.564493179321289, |
|
"learning_rate": 3.954365458554938e-05, |
|
"loss": 1.6836, |
|
"step": 10292 |
|
}, |
|
{ |
|
"epoch": 0.26131365612560675, |
|
"grad_norm": 1.5584787130355835, |
|
"learning_rate": 3.947526384030751e-05, |
|
"loss": 1.6852, |
|
"step": 10323 |
|
}, |
|
{ |
|
"epoch": 0.26209838181967765, |
|
"grad_norm": 1.4936350584030151, |
|
"learning_rate": 3.9406709743680624e-05, |
|
"loss": 1.6777, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 0.26288310751374855, |
|
"grad_norm": 1.504725694656372, |
|
"learning_rate": 3.9337993069292366e-05, |
|
"loss": 1.6765, |
|
"step": 10385 |
|
}, |
|
{ |
|
"epoch": 0.2636678332078194, |
|
"grad_norm": 1.4809914827346802, |
|
"learning_rate": 3.926911459260109e-05, |
|
"loss": 1.6578, |
|
"step": 10416 |
|
}, |
|
{ |
|
"epoch": 0.2644525589018903, |
|
"grad_norm": 1.529976725578308, |
|
"learning_rate": 3.920007509089102e-05, |
|
"loss": 1.6709, |
|
"step": 10447 |
|
}, |
|
{ |
|
"epoch": 0.2652372845959612, |
|
"grad_norm": 1.483694076538086, |
|
"learning_rate": 3.913087534326357e-05, |
|
"loss": 1.6713, |
|
"step": 10478 |
|
}, |
|
{ |
|
"epoch": 0.2660220102900321, |
|
"grad_norm": 1.4282972812652588, |
|
"learning_rate": 3.9061516130628475e-05, |
|
"loss": 1.6784, |
|
"step": 10509 |
|
}, |
|
{ |
|
"epoch": 0.266806735984103, |
|
"grad_norm": 1.5122032165527344, |
|
"learning_rate": 3.8991998235695025e-05, |
|
"loss": 1.6603, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.2675914616781739, |
|
"grad_norm": 1.5154742002487183, |
|
"learning_rate": 3.8922322442963224e-05, |
|
"loss": 1.6831, |
|
"step": 10571 |
|
}, |
|
{ |
|
"epoch": 0.2683761873722448, |
|
"grad_norm": 1.4630860090255737, |
|
"learning_rate": 3.885248953871491e-05, |
|
"loss": 1.6715, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 0.2691609130663157, |
|
"grad_norm": 1.4164702892303467, |
|
"learning_rate": 3.8782500311004915e-05, |
|
"loss": 1.6654, |
|
"step": 10633 |
|
}, |
|
{ |
|
"epoch": 0.2699456387603865, |
|
"grad_norm": 1.5865578651428223, |
|
"learning_rate": 3.871235554965218e-05, |
|
"loss": 1.6829, |
|
"step": 10664 |
|
}, |
|
{ |
|
"epoch": 0.2707303644544574, |
|
"grad_norm": 1.4984766244888306, |
|
"learning_rate": 3.864205604623078e-05, |
|
"loss": 1.673, |
|
"step": 10695 |
|
}, |
|
{ |
|
"epoch": 0.2715150901485283, |
|
"grad_norm": 1.5477566719055176, |
|
"learning_rate": 3.857160259406107e-05, |
|
"loss": 1.6711, |
|
"step": 10726 |
|
}, |
|
{ |
|
"epoch": 0.2722998158425992, |
|
"grad_norm": 1.5356842279434204, |
|
"learning_rate": 3.8500995988200674e-05, |
|
"loss": 1.6556, |
|
"step": 10757 |
|
}, |
|
{ |
|
"epoch": 0.2730845415366701, |
|
"grad_norm": 1.413104772567749, |
|
"learning_rate": 3.843023702543556e-05, |
|
"loss": 1.658, |
|
"step": 10788 |
|
}, |
|
{ |
|
"epoch": 0.273869267230741, |
|
"grad_norm": 1.5174081325531006, |
|
"learning_rate": 3.8359326504270984e-05, |
|
"loss": 1.6672, |
|
"step": 10819 |
|
}, |
|
{ |
|
"epoch": 0.2746539929248119, |
|
"grad_norm": 1.4649910926818848, |
|
"learning_rate": 3.828826522492255e-05, |
|
"loss": 1.6625, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.2754387186188828, |
|
"grad_norm": 1.5240408182144165, |
|
"learning_rate": 3.821705398930713e-05, |
|
"loss": 1.6619, |
|
"step": 10881 |
|
}, |
|
{ |
|
"epoch": 0.27622344431295365, |
|
"grad_norm": 1.4349104166030884, |
|
"learning_rate": 3.814569360103385e-05, |
|
"loss": 1.6595, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 0.27700817000702455, |
|
"grad_norm": 1.4311225414276123, |
|
"learning_rate": 3.807418486539499e-05, |
|
"loss": 1.6557, |
|
"step": 10943 |
|
}, |
|
{ |
|
"epoch": 0.27779289570109544, |
|
"grad_norm": 1.5817755460739136, |
|
"learning_rate": 3.80025285893569e-05, |
|
"loss": 1.6882, |
|
"step": 10974 |
|
}, |
|
{ |
|
"epoch": 0.27857762139516634, |
|
"grad_norm": 1.5182181596755981, |
|
"learning_rate": 3.793072558155093e-05, |
|
"loss": 1.6697, |
|
"step": 11005 |
|
}, |
|
{ |
|
"epoch": 0.27936234708923724, |
|
"grad_norm": 1.4836517572402954, |
|
"learning_rate": 3.785877665226426e-05, |
|
"loss": 1.6576, |
|
"step": 11036 |
|
}, |
|
{ |
|
"epoch": 0.28014707278330814, |
|
"grad_norm": 1.460788607597351, |
|
"learning_rate": 3.778668261343079e-05, |
|
"loss": 1.6607, |
|
"step": 11067 |
|
}, |
|
{ |
|
"epoch": 0.28093179847737904, |
|
"grad_norm": 1.4307125806808472, |
|
"learning_rate": 3.771444427862192e-05, |
|
"loss": 1.662, |
|
"step": 11098 |
|
}, |
|
{ |
|
"epoch": 0.28171652417144993, |
|
"grad_norm": 1.4999738931655884, |
|
"learning_rate": 3.7642062463037465e-05, |
|
"loss": 1.6406, |
|
"step": 11129 |
|
}, |
|
{ |
|
"epoch": 0.2825012498655208, |
|
"grad_norm": 1.4646129608154297, |
|
"learning_rate": 3.7569537983496373e-05, |
|
"loss": 1.6653, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.2832859755595917, |
|
"grad_norm": 1.4709292650222778, |
|
"learning_rate": 3.749687165842753e-05, |
|
"loss": 1.6704, |
|
"step": 11191 |
|
}, |
|
{ |
|
"epoch": 0.28407070125366257, |
|
"grad_norm": 1.494458556175232, |
|
"learning_rate": 3.7424064307860536e-05, |
|
"loss": 1.6534, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 0.28485542694773347, |
|
"grad_norm": 1.4409736394882202, |
|
"learning_rate": 3.735111675341645e-05, |
|
"loss": 1.6645, |
|
"step": 11253 |
|
}, |
|
{ |
|
"epoch": 0.28564015264180437, |
|
"grad_norm": 1.4628338813781738, |
|
"learning_rate": 3.7278029818298524e-05, |
|
"loss": 1.6611, |
|
"step": 11284 |
|
}, |
|
{ |
|
"epoch": 0.28642487833587527, |
|
"grad_norm": 1.3659113645553589, |
|
"learning_rate": 3.720480432728287e-05, |
|
"loss": 1.6435, |
|
"step": 11315 |
|
}, |
|
{ |
|
"epoch": 0.28720960402994616, |
|
"grad_norm": 1.3704752922058105, |
|
"learning_rate": 3.71314411067092e-05, |
|
"loss": 1.6507, |
|
"step": 11346 |
|
}, |
|
{ |
|
"epoch": 0.28799432972401706, |
|
"grad_norm": 1.579837441444397, |
|
"learning_rate": 3.70579409844715e-05, |
|
"loss": 1.6716, |
|
"step": 11377 |
|
}, |
|
{ |
|
"epoch": 0.2887790554180879, |
|
"grad_norm": 1.5566996335983276, |
|
"learning_rate": 3.698430479000865e-05, |
|
"loss": 1.6439, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 0.2895637811121588, |
|
"grad_norm": 1.4722687005996704, |
|
"learning_rate": 3.691053335429509e-05, |
|
"loss": 1.683, |
|
"step": 11439 |
|
}, |
|
{ |
|
"epoch": 0.2903485068062297, |
|
"grad_norm": 1.491283893585205, |
|
"learning_rate": 3.683662750983147e-05, |
|
"loss": 1.6606, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.2911332325003006, |
|
"grad_norm": 1.402040719985962, |
|
"learning_rate": 3.676258809063518e-05, |
|
"loss": 1.6582, |
|
"step": 11501 |
|
}, |
|
{ |
|
"epoch": 0.2919179581943715, |
|
"grad_norm": 1.4377038478851318, |
|
"learning_rate": 3.6688415932231004e-05, |
|
"loss": 1.6398, |
|
"step": 11532 |
|
}, |
|
{ |
|
"epoch": 0.2927026838884424, |
|
"grad_norm": 1.4151259660720825, |
|
"learning_rate": 3.661411187164166e-05, |
|
"loss": 1.6645, |
|
"step": 11563 |
|
}, |
|
{ |
|
"epoch": 0.2934874095825133, |
|
"grad_norm": 1.5219615697860718, |
|
"learning_rate": 3.65396767473784e-05, |
|
"loss": 1.6705, |
|
"step": 11594 |
|
}, |
|
{ |
|
"epoch": 0.2942721352765842, |
|
"grad_norm": 1.533252239227295, |
|
"learning_rate": 3.6465111399431465e-05, |
|
"loss": 1.6714, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 0.29505686097065503, |
|
"grad_norm": 1.410959243774414, |
|
"learning_rate": 3.6390416669260674e-05, |
|
"loss": 1.6533, |
|
"step": 11656 |
|
}, |
|
{ |
|
"epoch": 0.29584158666472593, |
|
"grad_norm": 1.5377541780471802, |
|
"learning_rate": 3.63155933997859e-05, |
|
"loss": 1.6505, |
|
"step": 11687 |
|
}, |
|
{ |
|
"epoch": 0.29662631235879683, |
|
"grad_norm": 1.4504135847091675, |
|
"learning_rate": 3.624064243537758e-05, |
|
"loss": 1.6287, |
|
"step": 11718 |
|
}, |
|
{ |
|
"epoch": 0.2974110380528677, |
|
"grad_norm": 1.4606986045837402, |
|
"learning_rate": 3.616556462184716e-05, |
|
"loss": 1.6592, |
|
"step": 11749 |
|
}, |
|
{ |
|
"epoch": 0.2981957637469386, |
|
"grad_norm": 1.4440289735794067, |
|
"learning_rate": 3.609036080643755e-05, |
|
"loss": 1.6598, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.2989804894410095, |
|
"grad_norm": 1.5399249792099, |
|
"learning_rate": 3.60150318378136e-05, |
|
"loss": 1.6852, |
|
"step": 11811 |
|
}, |
|
{ |
|
"epoch": 0.2997652151350804, |
|
"grad_norm": 1.4778543710708618, |
|
"learning_rate": 3.5939578566052465e-05, |
|
"loss": 1.6462, |
|
"step": 11842 |
|
}, |
|
{ |
|
"epoch": 0.3005499408291513, |
|
"grad_norm": 1.4979726076126099, |
|
"learning_rate": 3.586400184263408e-05, |
|
"loss": 1.6576, |
|
"step": 11873 |
|
}, |
|
{ |
|
"epoch": 0.30133466652322216, |
|
"grad_norm": 1.4904232025146484, |
|
"learning_rate": 3.578830252043148e-05, |
|
"loss": 1.6476, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 0.30211939221729306, |
|
"grad_norm": 1.5472886562347412, |
|
"learning_rate": 3.571248145370125e-05, |
|
"loss": 1.6721, |
|
"step": 11935 |
|
}, |
|
{ |
|
"epoch": 0.30290411791136396, |
|
"grad_norm": 1.4954209327697754, |
|
"learning_rate": 3.5636539498073794e-05, |
|
"loss": 1.6483, |
|
"step": 11966 |
|
}, |
|
{ |
|
"epoch": 0.30368884360543486, |
|
"grad_norm": 1.4504363536834717, |
|
"learning_rate": 3.556047751054378e-05, |
|
"loss": 1.657, |
|
"step": 11997 |
|
}, |
|
{ |
|
"epoch": 0.30447356929950575, |
|
"grad_norm": 1.3581033945083618, |
|
"learning_rate": 3.548429634946039e-05, |
|
"loss": 1.6579, |
|
"step": 12028 |
|
}, |
|
{ |
|
"epoch": 0.30525829499357665, |
|
"grad_norm": 1.4421014785766602, |
|
"learning_rate": 3.540799687451768e-05, |
|
"loss": 1.6496, |
|
"step": 12059 |
|
}, |
|
{ |
|
"epoch": 0.30604302068764755, |
|
"grad_norm": 1.523169994354248, |
|
"learning_rate": 3.533157994674485e-05, |
|
"loss": 1.6714, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.30682774638171845, |
|
"grad_norm": 1.455269455909729, |
|
"learning_rate": 3.5255046428496546e-05, |
|
"loss": 1.6695, |
|
"step": 12121 |
|
}, |
|
{ |
|
"epoch": 0.3076124720757893, |
|
"grad_norm": 1.4330891370773315, |
|
"learning_rate": 3.517839718344311e-05, |
|
"loss": 1.6519, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 0.3083971977698602, |
|
"grad_norm": 1.3913158178329468, |
|
"learning_rate": 3.510163307656086e-05, |
|
"loss": 1.6329, |
|
"step": 12183 |
|
}, |
|
{ |
|
"epoch": 0.3091819234639311, |
|
"grad_norm": 1.355193018913269, |
|
"learning_rate": 3.5024754974122324e-05, |
|
"loss": 1.624, |
|
"step": 12214 |
|
}, |
|
{ |
|
"epoch": 0.309966649158002, |
|
"grad_norm": 1.4055231809616089, |
|
"learning_rate": 3.494776374368643e-05, |
|
"loss": 1.6491, |
|
"step": 12245 |
|
}, |
|
{ |
|
"epoch": 0.3107513748520729, |
|
"grad_norm": 1.4227032661437988, |
|
"learning_rate": 3.4870660254088724e-05, |
|
"loss": 1.6274, |
|
"step": 12276 |
|
}, |
|
{ |
|
"epoch": 0.3115361005461438, |
|
"grad_norm": 1.4558427333831787, |
|
"learning_rate": 3.479344537543164e-05, |
|
"loss": 1.6419, |
|
"step": 12307 |
|
}, |
|
{ |
|
"epoch": 0.3123208262402147, |
|
"grad_norm": 1.5154629945755005, |
|
"learning_rate": 3.4716119979074565e-05, |
|
"loss": 1.6443, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 0.3131055519342856, |
|
"grad_norm": 1.4458774328231812, |
|
"learning_rate": 3.463868493762412e-05, |
|
"loss": 1.6615, |
|
"step": 12369 |
|
}, |
|
{ |
|
"epoch": 0.3138902776283564, |
|
"grad_norm": 1.4116544723510742, |
|
"learning_rate": 3.456114112492418e-05, |
|
"loss": 1.6481, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3146750033224273, |
|
"grad_norm": 1.8497071266174316, |
|
"learning_rate": 3.4483489416046164e-05, |
|
"loss": 1.6262, |
|
"step": 12431 |
|
}, |
|
{ |
|
"epoch": 0.3154597290164982, |
|
"grad_norm": 1.3854331970214844, |
|
"learning_rate": 3.440573068727905e-05, |
|
"loss": 1.6387, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 0.3162444547105691, |
|
"grad_norm": 1.509178876876831, |
|
"learning_rate": 3.4327865816119495e-05, |
|
"loss": 1.6566, |
|
"step": 12493 |
|
}, |
|
{ |
|
"epoch": 0.31702918040464, |
|
"grad_norm": 1.3977612257003784, |
|
"learning_rate": 3.4249895681262025e-05, |
|
"loss": 1.6676, |
|
"step": 12524 |
|
}, |
|
{ |
|
"epoch": 0.3178139060987109, |
|
"grad_norm": 1.3736423254013062, |
|
"learning_rate": 3.417182116258899e-05, |
|
"loss": 1.6238, |
|
"step": 12555 |
|
}, |
|
{ |
|
"epoch": 0.3185986317927818, |
|
"grad_norm": 1.4226630926132202, |
|
"learning_rate": 3.409364314116074e-05, |
|
"loss": 1.6513, |
|
"step": 12586 |
|
}, |
|
{ |
|
"epoch": 0.3193833574868527, |
|
"grad_norm": 1.4804571866989136, |
|
"learning_rate": 3.401536249920559e-05, |
|
"loss": 1.6383, |
|
"step": 12617 |
|
}, |
|
{ |
|
"epoch": 0.32016808318092355, |
|
"grad_norm": 1.456168532371521, |
|
"learning_rate": 3.393698012010998e-05, |
|
"loss": 1.6621, |
|
"step": 12648 |
|
}, |
|
{ |
|
"epoch": 0.32095280887499444, |
|
"grad_norm": 1.3990952968597412, |
|
"learning_rate": 3.385849688840839e-05, |
|
"loss": 1.6376, |
|
"step": 12679 |
|
}, |
|
{ |
|
"epoch": 0.32173753456906534, |
|
"grad_norm": 1.3588812351226807, |
|
"learning_rate": 3.3779913689773414e-05, |
|
"loss": 1.656, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.32252226026313624, |
|
"grad_norm": 1.4718931913375854, |
|
"learning_rate": 3.370123141100578e-05, |
|
"loss": 1.6255, |
|
"step": 12741 |
|
}, |
|
{ |
|
"epoch": 0.32330698595720714, |
|
"grad_norm": 1.3603503704071045, |
|
"learning_rate": 3.3622450940024305e-05, |
|
"loss": 1.6517, |
|
"step": 12772 |
|
}, |
|
{ |
|
"epoch": 0.32409171165127804, |
|
"grad_norm": 1.4493441581726074, |
|
"learning_rate": 3.35435731658559e-05, |
|
"loss": 1.643, |
|
"step": 12803 |
|
}, |
|
{ |
|
"epoch": 0.32487643734534893, |
|
"grad_norm": 1.3813337087631226, |
|
"learning_rate": 3.346459897862552e-05, |
|
"loss": 1.6449, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 0.32566116303941983, |
|
"grad_norm": 1.5027899742126465, |
|
"learning_rate": 3.338552926954613e-05, |
|
"loss": 1.6497, |
|
"step": 12865 |
|
}, |
|
{ |
|
"epoch": 0.3264458887334907, |
|
"grad_norm": 1.3805309534072876, |
|
"learning_rate": 3.330636493090868e-05, |
|
"loss": 1.6449, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 0.3272306144275616, |
|
"grad_norm": 1.642248511314392, |
|
"learning_rate": 3.322710685607193e-05, |
|
"loss": 1.6261, |
|
"step": 12927 |
|
}, |
|
{ |
|
"epoch": 0.32801534012163247, |
|
"grad_norm": 1.4579522609710693, |
|
"learning_rate": 3.314775593945251e-05, |
|
"loss": 1.6648, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 0.32880006581570337, |
|
"grad_norm": 1.3579092025756836, |
|
"learning_rate": 3.3068313076514714e-05, |
|
"loss": 1.6468, |
|
"step": 12989 |
|
}, |
|
{ |
|
"epoch": 0.32958479150977427, |
|
"grad_norm": 1.406051754951477, |
|
"learning_rate": 3.298877916376047e-05, |
|
"loss": 1.6249, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.33036951720384516, |
|
"grad_norm": 1.457335114479065, |
|
"learning_rate": 3.290915509871915e-05, |
|
"loss": 1.6353, |
|
"step": 13051 |
|
}, |
|
{ |
|
"epoch": 0.33115424289791606, |
|
"grad_norm": 1.4548041820526123, |
|
"learning_rate": 3.282944177993753e-05, |
|
"loss": 1.6272, |
|
"step": 13082 |
|
}, |
|
{ |
|
"epoch": 0.33193896859198696, |
|
"grad_norm": 1.4140032529830933, |
|
"learning_rate": 3.274964010696957e-05, |
|
"loss": 1.6479, |
|
"step": 13113 |
|
}, |
|
{ |
|
"epoch": 0.3327236942860578, |
|
"grad_norm": 1.3436623811721802, |
|
"learning_rate": 3.266975098036629e-05, |
|
"loss": 1.6452, |
|
"step": 13144 |
|
}, |
|
{ |
|
"epoch": 0.3335084199801287, |
|
"grad_norm": 1.4224274158477783, |
|
"learning_rate": 3.258977530166562e-05, |
|
"loss": 1.6242, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 0.3342931456741996, |
|
"grad_norm": 1.5661940574645996, |
|
"learning_rate": 3.250971397338227e-05, |
|
"loss": 1.6404, |
|
"step": 13206 |
|
}, |
|
{ |
|
"epoch": 0.3350778713682705, |
|
"grad_norm": 1.4696576595306396, |
|
"learning_rate": 3.2429567898997404e-05, |
|
"loss": 1.6436, |
|
"step": 13237 |
|
}, |
|
{ |
|
"epoch": 0.3358625970623414, |
|
"grad_norm": 1.4438591003417969, |
|
"learning_rate": 3.234933798294859e-05, |
|
"loss": 1.6404, |
|
"step": 13268 |
|
}, |
|
{ |
|
"epoch": 0.3366473227564123, |
|
"grad_norm": 1.4548406600952148, |
|
"learning_rate": 3.2269025130619535e-05, |
|
"loss": 1.6461, |
|
"step": 13299 |
|
}, |
|
{ |
|
"epoch": 0.3374320484504832, |
|
"grad_norm": 1.4180691242218018, |
|
"learning_rate": 3.218863024832985e-05, |
|
"loss": 1.6377, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.3382167741445541, |
|
"grad_norm": 1.4060105085372925, |
|
"learning_rate": 3.2108154243324864e-05, |
|
"loss": 1.6045, |
|
"step": 13361 |
|
}, |
|
{ |
|
"epoch": 0.33900149983862493, |
|
"grad_norm": 1.4134920835494995, |
|
"learning_rate": 3.2027598023765345e-05, |
|
"loss": 1.6264, |
|
"step": 13392 |
|
}, |
|
{ |
|
"epoch": 0.33978622553269583, |
|
"grad_norm": 1.4582122564315796, |
|
"learning_rate": 3.194696249871729e-05, |
|
"loss": 1.623, |
|
"step": 13423 |
|
}, |
|
{ |
|
"epoch": 0.3405709512267667, |
|
"grad_norm": 1.4027389287948608, |
|
"learning_rate": 3.186624857814164e-05, |
|
"loss": 1.6337, |
|
"step": 13454 |
|
}, |
|
{ |
|
"epoch": 0.3413556769208376, |
|
"grad_norm": 1.3397070169448853, |
|
"learning_rate": 3.178545717288401e-05, |
|
"loss": 1.6334, |
|
"step": 13485 |
|
}, |
|
{ |
|
"epoch": 0.3421404026149085, |
|
"grad_norm": 1.5358332395553589, |
|
"learning_rate": 3.170458919466444e-05, |
|
"loss": 1.6393, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 0.3429251283089794, |
|
"grad_norm": 1.5479260683059692, |
|
"learning_rate": 3.1623645556067063e-05, |
|
"loss": 1.6357, |
|
"step": 13547 |
|
}, |
|
{ |
|
"epoch": 0.3437098540030503, |
|
"grad_norm": 1.3949965238571167, |
|
"learning_rate": 3.154262717052985e-05, |
|
"loss": 1.6325, |
|
"step": 13578 |
|
}, |
|
{ |
|
"epoch": 0.3444945796971212, |
|
"grad_norm": 1.392903208732605, |
|
"learning_rate": 3.146153495233426e-05, |
|
"loss": 1.6071, |
|
"step": 13609 |
|
}, |
|
{ |
|
"epoch": 0.34527930539119206, |
|
"grad_norm": 1.4290788173675537, |
|
"learning_rate": 3.1380369816594944e-05, |
|
"loss": 1.6266, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.34606403108526296, |
|
"grad_norm": 1.4005228281021118, |
|
"learning_rate": 3.129913267924946e-05, |
|
"loss": 1.6391, |
|
"step": 13671 |
|
}, |
|
{ |
|
"epoch": 0.34684875677933386, |
|
"grad_norm": 1.378369927406311, |
|
"learning_rate": 3.121782445704782e-05, |
|
"loss": 1.6495, |
|
"step": 13702 |
|
}, |
|
{ |
|
"epoch": 0.34763348247340475, |
|
"grad_norm": 1.4202784299850464, |
|
"learning_rate": 3.11364460675423e-05, |
|
"loss": 1.637, |
|
"step": 13733 |
|
}, |
|
{ |
|
"epoch": 0.34841820816747565, |
|
"grad_norm": 1.3670291900634766, |
|
"learning_rate": 3.1054998429076934e-05, |
|
"loss": 1.5941, |
|
"step": 13764 |
|
}, |
|
{ |
|
"epoch": 0.34920293386154655, |
|
"grad_norm": 1.3714202642440796, |
|
"learning_rate": 3.097348246077728e-05, |
|
"loss": 1.6096, |
|
"step": 13795 |
|
}, |
|
{ |
|
"epoch": 0.34998765955561745, |
|
"grad_norm": 1.4889552593231201, |
|
"learning_rate": 3.0891899082539924e-05, |
|
"loss": 1.6245, |
|
"step": 13826 |
|
}, |
|
{ |
|
"epoch": 0.35077238524968835, |
|
"grad_norm": 1.4640086889266968, |
|
"learning_rate": 3.0810249215022233e-05, |
|
"loss": 1.6197, |
|
"step": 13857 |
|
}, |
|
{ |
|
"epoch": 0.35155711094375924, |
|
"grad_norm": 1.385380506515503, |
|
"learning_rate": 3.0728533779631865e-05, |
|
"loss": 1.61, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 0.3523418366378301, |
|
"grad_norm": 1.3958945274353027, |
|
"learning_rate": 3.064675369851637e-05, |
|
"loss": 1.6139, |
|
"step": 13919 |
|
}, |
|
{ |
|
"epoch": 0.353126562331901, |
|
"grad_norm": 1.3746731281280518, |
|
"learning_rate": 3.056490989455289e-05, |
|
"loss": 1.6307, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.3539112880259719, |
|
"grad_norm": 1.4196429252624512, |
|
"learning_rate": 3.0483003291337596e-05, |
|
"loss": 1.6192, |
|
"step": 13981 |
|
}, |
|
{ |
|
"epoch": 0.3546960137200428, |
|
"grad_norm": 1.3648637533187866, |
|
"learning_rate": 3.040103481317539e-05, |
|
"loss": 1.6124, |
|
"step": 14012 |
|
}, |
|
{ |
|
"epoch": 0.3554807394141137, |
|
"grad_norm": 1.422004222869873, |
|
"learning_rate": 3.03190053850694e-05, |
|
"loss": 1.6288, |
|
"step": 14043 |
|
}, |
|
{ |
|
"epoch": 0.3562654651081846, |
|
"grad_norm": 1.4687801599502563, |
|
"learning_rate": 3.0236915932710573e-05, |
|
"loss": 1.6118, |
|
"step": 14074 |
|
}, |
|
{ |
|
"epoch": 0.3570501908022555, |
|
"grad_norm": 1.30635404586792, |
|
"learning_rate": 3.0154767382467232e-05, |
|
"loss": 1.6341, |
|
"step": 14105 |
|
}, |
|
{ |
|
"epoch": 0.35783491649632637, |
|
"grad_norm": 1.4216945171356201, |
|
"learning_rate": 3.0072560661374582e-05, |
|
"loss": 1.6385, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 0.3586196421903972, |
|
"grad_norm": 1.4296518564224243, |
|
"learning_rate": 2.999029669712431e-05, |
|
"loss": 1.6262, |
|
"step": 14167 |
|
}, |
|
{ |
|
"epoch": 0.3594043678844681, |
|
"grad_norm": 1.4529691934585571, |
|
"learning_rate": 2.990797641805408e-05, |
|
"loss": 1.6136, |
|
"step": 14198 |
|
}, |
|
{ |
|
"epoch": 0.360189093578539, |
|
"grad_norm": 1.389478325843811, |
|
"learning_rate": 2.982560075313704e-05, |
|
"loss": 1.6263, |
|
"step": 14229 |
|
}, |
|
{ |
|
"epoch": 0.3609738192726099, |
|
"grad_norm": 1.3917667865753174, |
|
"learning_rate": 2.9743170631971368e-05, |
|
"loss": 1.6456, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.3617585449666808, |
|
"grad_norm": 1.3452563285827637, |
|
"learning_rate": 2.9660686984769792e-05, |
|
"loss": 1.6284, |
|
"step": 14291 |
|
}, |
|
{ |
|
"epoch": 0.3625432706607517, |
|
"grad_norm": 1.421159029006958, |
|
"learning_rate": 2.9578150742349047e-05, |
|
"loss": 1.6232, |
|
"step": 14322 |
|
}, |
|
{ |
|
"epoch": 0.3633279963548226, |
|
"grad_norm": 1.4312077760696411, |
|
"learning_rate": 2.949556283611942e-05, |
|
"loss": 1.6006, |
|
"step": 14353 |
|
}, |
|
{ |
|
"epoch": 0.3641127220488935, |
|
"grad_norm": 1.4271692037582397, |
|
"learning_rate": 2.9412924198074206e-05, |
|
"loss": 1.6177, |
|
"step": 14384 |
|
}, |
|
{ |
|
"epoch": 0.36489744774296434, |
|
"grad_norm": 1.3584555387496948, |
|
"learning_rate": 2.9330235760779208e-05, |
|
"loss": 1.6148, |
|
"step": 14415 |
|
}, |
|
{ |
|
"epoch": 0.36568217343703524, |
|
"grad_norm": 1.3882123231887817, |
|
"learning_rate": 2.9247498457362188e-05, |
|
"loss": 1.6327, |
|
"step": 14446 |
|
}, |
|
{ |
|
"epoch": 0.36646689913110614, |
|
"grad_norm": 1.540114402770996, |
|
"learning_rate": 2.9164713221502373e-05, |
|
"loss": 1.6052, |
|
"step": 14477 |
|
}, |
|
{ |
|
"epoch": 0.36725162482517704, |
|
"grad_norm": 1.3554641008377075, |
|
"learning_rate": 2.9081880987419912e-05, |
|
"loss": 1.6091, |
|
"step": 14508 |
|
}, |
|
{ |
|
"epoch": 0.36803635051924793, |
|
"grad_norm": 1.3693712949752808, |
|
"learning_rate": 2.8999002689865296e-05, |
|
"loss": 1.5936, |
|
"step": 14539 |
|
}, |
|
{ |
|
"epoch": 0.36882107621331883, |
|
"grad_norm": 1.354278564453125, |
|
"learning_rate": 2.8916079264108852e-05, |
|
"loss": 1.612, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 0.36960580190738973, |
|
"grad_norm": 1.3731021881103516, |
|
"learning_rate": 2.883311164593017e-05, |
|
"loss": 1.6064, |
|
"step": 14601 |
|
}, |
|
{ |
|
"epoch": 0.37039052760146063, |
|
"grad_norm": 1.3914356231689453, |
|
"learning_rate": 2.875010077160754e-05, |
|
"loss": 1.6036, |
|
"step": 14632 |
|
}, |
|
{ |
|
"epoch": 0.37117525329553147, |
|
"grad_norm": 1.4811164140701294, |
|
"learning_rate": 2.866704757790741e-05, |
|
"loss": 1.6195, |
|
"step": 14663 |
|
}, |
|
{ |
|
"epoch": 0.37195997898960237, |
|
"grad_norm": 1.4619332551956177, |
|
"learning_rate": 2.858395300207376e-05, |
|
"loss": 1.6315, |
|
"step": 14694 |
|
}, |
|
{ |
|
"epoch": 0.37274470468367327, |
|
"grad_norm": 1.456950306892395, |
|
"learning_rate": 2.8500817981817607e-05, |
|
"loss": 1.6276, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 0.37352943037774416, |
|
"grad_norm": 5.129410266876221, |
|
"learning_rate": 2.8417643455306336e-05, |
|
"loss": 1.6234, |
|
"step": 14756 |
|
}, |
|
{ |
|
"epoch": 0.37431415607181506, |
|
"grad_norm": 1.3831191062927246, |
|
"learning_rate": 2.8334430361153185e-05, |
|
"loss": 1.6163, |
|
"step": 14787 |
|
}, |
|
{ |
|
"epoch": 0.37509888176588596, |
|
"grad_norm": 1.3817623853683472, |
|
"learning_rate": 2.8251179638406612e-05, |
|
"loss": 1.6206, |
|
"step": 14818 |
|
}, |
|
{ |
|
"epoch": 0.37588360745995686, |
|
"grad_norm": 1.5285260677337646, |
|
"learning_rate": 2.8167892226539704e-05, |
|
"loss": 1.6117, |
|
"step": 14849 |
|
}, |
|
{ |
|
"epoch": 0.37666833315402776, |
|
"grad_norm": 1.403324007987976, |
|
"learning_rate": 2.8084569065439588e-05, |
|
"loss": 1.5962, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.3774530588480986, |
|
"grad_norm": 1.3314014673233032, |
|
"learning_rate": 2.8001211095396807e-05, |
|
"loss": 1.6116, |
|
"step": 14911 |
|
}, |
|
{ |
|
"epoch": 0.3782377845421695, |
|
"grad_norm": 1.4300462007522583, |
|
"learning_rate": 2.791781925709473e-05, |
|
"loss": 1.6234, |
|
"step": 14942 |
|
}, |
|
{ |
|
"epoch": 0.3790225102362404, |
|
"grad_norm": 1.424811601638794, |
|
"learning_rate": 2.7834394491598908e-05, |
|
"loss": 1.5986, |
|
"step": 14973 |
|
}, |
|
{ |
|
"epoch": 0.3798072359303113, |
|
"grad_norm": 1.3818182945251465, |
|
"learning_rate": 2.7750937740346485e-05, |
|
"loss": 1.6012, |
|
"step": 15004 |
|
}, |
|
{ |
|
"epoch": 0.3805919616243822, |
|
"grad_norm": 1.4053683280944824, |
|
"learning_rate": 2.7667449945135564e-05, |
|
"loss": 1.6018, |
|
"step": 15035 |
|
}, |
|
{ |
|
"epoch": 0.3813766873184531, |
|
"grad_norm": 1.5093421936035156, |
|
"learning_rate": 2.7583932048114557e-05, |
|
"loss": 1.61, |
|
"step": 15066 |
|
}, |
|
{ |
|
"epoch": 0.382161413012524, |
|
"grad_norm": 1.412494421005249, |
|
"learning_rate": 2.7500384991771587e-05, |
|
"loss": 1.613, |
|
"step": 15097 |
|
}, |
|
{ |
|
"epoch": 0.3829461387065949, |
|
"grad_norm": 1.335167646408081, |
|
"learning_rate": 2.7416809718923825e-05, |
|
"loss": 1.6197, |
|
"step": 15128 |
|
}, |
|
{ |
|
"epoch": 0.3837308644006657, |
|
"grad_norm": 1.334786295890808, |
|
"learning_rate": 2.7333207172706864e-05, |
|
"loss": 1.6284, |
|
"step": 15159 |
|
}, |
|
{ |
|
"epoch": 0.3845155900947366, |
|
"grad_norm": 1.4039522409439087, |
|
"learning_rate": 2.7249578296564088e-05, |
|
"loss": 1.5889, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 0.3853003157888075, |
|
"grad_norm": 1.4196487665176392, |
|
"learning_rate": 2.7165924034235973e-05, |
|
"loss": 1.6132, |
|
"step": 15221 |
|
}, |
|
{ |
|
"epoch": 0.3860850414828784, |
|
"grad_norm": 1.4701744318008423, |
|
"learning_rate": 2.708224532974953e-05, |
|
"loss": 1.6009, |
|
"step": 15252 |
|
}, |
|
{ |
|
"epoch": 0.3868697671769493, |
|
"grad_norm": 1.319935917854309, |
|
"learning_rate": 2.6998543127407538e-05, |
|
"loss": 1.6333, |
|
"step": 15283 |
|
}, |
|
{ |
|
"epoch": 0.3876544928710202, |
|
"grad_norm": 1.3962234258651733, |
|
"learning_rate": 2.6914818371777988e-05, |
|
"loss": 1.6175, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 0.3884392185650911, |
|
"grad_norm": 1.4284230470657349, |
|
"learning_rate": 2.6831072007683373e-05, |
|
"loss": 1.6007, |
|
"step": 15345 |
|
}, |
|
{ |
|
"epoch": 0.389223944259162, |
|
"grad_norm": 1.298251748085022, |
|
"learning_rate": 2.6747304980190018e-05, |
|
"loss": 1.605, |
|
"step": 15376 |
|
}, |
|
{ |
|
"epoch": 0.39000866995323286, |
|
"grad_norm": 1.294994831085205, |
|
"learning_rate": 2.6663518234597453e-05, |
|
"loss": 1.6025, |
|
"step": 15407 |
|
}, |
|
{ |
|
"epoch": 0.39079339564730375, |
|
"grad_norm": 1.440958023071289, |
|
"learning_rate": 2.6579712716427696e-05, |
|
"loss": 1.6002, |
|
"step": 15438 |
|
}, |
|
{ |
|
"epoch": 0.39157812134137465, |
|
"grad_norm": 1.439590573310852, |
|
"learning_rate": 2.6495889371414652e-05, |
|
"loss": 1.6025, |
|
"step": 15469 |
|
}, |
|
{ |
|
"epoch": 0.39236284703544555, |
|
"grad_norm": 1.4235502481460571, |
|
"learning_rate": 2.6412049145493367e-05, |
|
"loss": 1.5993, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.39314757272951645, |
|
"grad_norm": 1.4449518918991089, |
|
"learning_rate": 2.632819298478939e-05, |
|
"loss": 1.63, |
|
"step": 15531 |
|
}, |
|
{ |
|
"epoch": 0.39393229842358735, |
|
"grad_norm": 1.4422321319580078, |
|
"learning_rate": 2.6244321835608105e-05, |
|
"loss": 1.6193, |
|
"step": 15562 |
|
}, |
|
{ |
|
"epoch": 0.39471702411765824, |
|
"grad_norm": 1.4232275485992432, |
|
"learning_rate": 2.6160436644424024e-05, |
|
"loss": 1.6193, |
|
"step": 15593 |
|
}, |
|
{ |
|
"epoch": 0.39550174981172914, |
|
"grad_norm": 1.5187265872955322, |
|
"learning_rate": 2.6076538357870133e-05, |
|
"loss": 1.618, |
|
"step": 15624 |
|
}, |
|
{ |
|
"epoch": 0.3962864755058, |
|
"grad_norm": 1.4493205547332764, |
|
"learning_rate": 2.5992627922727196e-05, |
|
"loss": 1.6082, |
|
"step": 15655 |
|
}, |
|
{ |
|
"epoch": 0.3970712011998709, |
|
"grad_norm": 1.5100423097610474, |
|
"learning_rate": 2.5908706285913066e-05, |
|
"loss": 1.6081, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 0.3978559268939418, |
|
"grad_norm": 1.465114712715149, |
|
"learning_rate": 2.5824774394472008e-05, |
|
"loss": 1.6125, |
|
"step": 15717 |
|
}, |
|
{ |
|
"epoch": 0.3986406525880127, |
|
"grad_norm": 1.4160761833190918, |
|
"learning_rate": 2.5740833195563996e-05, |
|
"loss": 1.5951, |
|
"step": 15748 |
|
}, |
|
{ |
|
"epoch": 0.3994253782820836, |
|
"grad_norm": 1.381658673286438, |
|
"learning_rate": 2.5656883636454067e-05, |
|
"loss": 1.6051, |
|
"step": 15779 |
|
}, |
|
{ |
|
"epoch": 0.4002101039761545, |
|
"grad_norm": 1.3883142471313477, |
|
"learning_rate": 2.557292666450159e-05, |
|
"loss": 1.6039, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 0.4009948296702254, |
|
"grad_norm": 1.506911039352417, |
|
"learning_rate": 2.5488963227149566e-05, |
|
"loss": 1.5761, |
|
"step": 15841 |
|
}, |
|
{ |
|
"epoch": 0.40177955536429627, |
|
"grad_norm": 1.4450113773345947, |
|
"learning_rate": 2.5404994271913983e-05, |
|
"loss": 1.5734, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.4025642810583671, |
|
"grad_norm": 1.3970619440078735, |
|
"learning_rate": 2.5321020746373085e-05, |
|
"loss": 1.6094, |
|
"step": 15903 |
|
}, |
|
{ |
|
"epoch": 0.403349006752438, |
|
"grad_norm": 1.4761073589324951, |
|
"learning_rate": 2.52370435981567e-05, |
|
"loss": 1.6075, |
|
"step": 15934 |
|
}, |
|
{ |
|
"epoch": 0.4041337324465089, |
|
"grad_norm": 1.3969392776489258, |
|
"learning_rate": 2.5153063774935533e-05, |
|
"loss": 1.5788, |
|
"step": 15965 |
|
}, |
|
{ |
|
"epoch": 0.4049184581405798, |
|
"grad_norm": 1.3772737979888916, |
|
"learning_rate": 2.506908222441045e-05, |
|
"loss": 1.61, |
|
"step": 15996 |
|
}, |
|
{ |
|
"epoch": 0.4057031838346507, |
|
"grad_norm": 1.3969396352767944, |
|
"learning_rate": 2.498509989430187e-05, |
|
"loss": 1.5943, |
|
"step": 16027 |
|
}, |
|
{ |
|
"epoch": 0.4064879095287216, |
|
"grad_norm": 1.3052096366882324, |
|
"learning_rate": 2.4901117732338958e-05, |
|
"loss": 1.61, |
|
"step": 16058 |
|
}, |
|
{ |
|
"epoch": 0.4072726352227925, |
|
"grad_norm": 1.394612193107605, |
|
"learning_rate": 2.481713668624899e-05, |
|
"loss": 1.6018, |
|
"step": 16089 |
|
}, |
|
{ |
|
"epoch": 0.4080573609168634, |
|
"grad_norm": 1.3575886487960815, |
|
"learning_rate": 2.4733157703746663e-05, |
|
"loss": 1.5883, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 0.40884208661093424, |
|
"grad_norm": 1.3952176570892334, |
|
"learning_rate": 2.4649181732523392e-05, |
|
"loss": 1.6152, |
|
"step": 16151 |
|
}, |
|
{ |
|
"epoch": 0.40962681230500514, |
|
"grad_norm": 1.5711455345153809, |
|
"learning_rate": 2.4565209720236582e-05, |
|
"loss": 1.61, |
|
"step": 16182 |
|
}, |
|
{ |
|
"epoch": 0.41041153799907604, |
|
"grad_norm": 1.5258722305297852, |
|
"learning_rate": 2.4481242614498975e-05, |
|
"loss": 1.628, |
|
"step": 16213 |
|
}, |
|
{ |
|
"epoch": 0.41119626369314694, |
|
"grad_norm": 1.425764799118042, |
|
"learning_rate": 2.439728136286796e-05, |
|
"loss": 1.5872, |
|
"step": 16244 |
|
}, |
|
{ |
|
"epoch": 0.41198098938721783, |
|
"grad_norm": 1.3165446519851685, |
|
"learning_rate": 2.4313326912834852e-05, |
|
"loss": 1.6008, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 0.41276571508128873, |
|
"grad_norm": 1.386579155921936, |
|
"learning_rate": 2.4229380211814206e-05, |
|
"loss": 1.5783, |
|
"step": 16306 |
|
}, |
|
{ |
|
"epoch": 0.41355044077535963, |
|
"grad_norm": 1.464693307876587, |
|
"learning_rate": 2.4145442207133124e-05, |
|
"loss": 1.5947, |
|
"step": 16337 |
|
}, |
|
{ |
|
"epoch": 0.4143351664694305, |
|
"grad_norm": 1.334782600402832, |
|
"learning_rate": 2.406151384602059e-05, |
|
"loss": 1.5886, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 0.41511989216350137, |
|
"grad_norm": 1.4115489721298218, |
|
"learning_rate": 2.3977596075596747e-05, |
|
"loss": 1.5821, |
|
"step": 16399 |
|
}, |
|
{ |
|
"epoch": 0.41590461785757227, |
|
"grad_norm": 1.391065001487732, |
|
"learning_rate": 2.3893689842862223e-05, |
|
"loss": 1.6141, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 0.41668934355164317, |
|
"grad_norm": 1.4244657754898071, |
|
"learning_rate": 2.3809796094687475e-05, |
|
"loss": 1.6008, |
|
"step": 16461 |
|
}, |
|
{ |
|
"epoch": 0.41747406924571406, |
|
"grad_norm": 1.3113791942596436, |
|
"learning_rate": 2.372591577780202e-05, |
|
"loss": 1.608, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 0.41825879493978496, |
|
"grad_norm": 1.4262186288833618, |
|
"learning_rate": 2.3642049838783838e-05, |
|
"loss": 1.5801, |
|
"step": 16523 |
|
}, |
|
{ |
|
"epoch": 0.41904352063385586, |
|
"grad_norm": 1.4219175577163696, |
|
"learning_rate": 2.3558199224048666e-05, |
|
"loss": 1.592, |
|
"step": 16554 |
|
}, |
|
{ |
|
"epoch": 0.41982824632792676, |
|
"grad_norm": 1.4542045593261719, |
|
"learning_rate": 2.347436487983929e-05, |
|
"loss": 1.6062, |
|
"step": 16585 |
|
}, |
|
{ |
|
"epoch": 0.42061297202199766, |
|
"grad_norm": 1.4484211206436157, |
|
"learning_rate": 2.3390547752214888e-05, |
|
"loss": 1.6042, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 0.4213976977160685, |
|
"grad_norm": 1.4561681747436523, |
|
"learning_rate": 2.330674878704035e-05, |
|
"loss": 1.617, |
|
"step": 16647 |
|
}, |
|
{ |
|
"epoch": 0.4221824234101394, |
|
"grad_norm": 1.4250808954238892, |
|
"learning_rate": 2.322296892997561e-05, |
|
"loss": 1.5947, |
|
"step": 16678 |
|
}, |
|
{ |
|
"epoch": 0.4229671491042103, |
|
"grad_norm": 1.3762766122817993, |
|
"learning_rate": 2.313920912646497e-05, |
|
"loss": 1.5962, |
|
"step": 16709 |
|
}, |
|
{ |
|
"epoch": 0.4237518747982812, |
|
"grad_norm": 1.3508645296096802, |
|
"learning_rate": 2.305547032172643e-05, |
|
"loss": 1.5969, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 0.4245366004923521, |
|
"grad_norm": 1.4839844703674316, |
|
"learning_rate": 2.2971753460741014e-05, |
|
"loss": 1.5697, |
|
"step": 16771 |
|
}, |
|
{ |
|
"epoch": 0.425321326186423, |
|
"grad_norm": 1.4027475118637085, |
|
"learning_rate": 2.288805948824212e-05, |
|
"loss": 1.5758, |
|
"step": 16802 |
|
}, |
|
{ |
|
"epoch": 0.4261060518804939, |
|
"grad_norm": 1.3288599252700806, |
|
"learning_rate": 2.2804389348704858e-05, |
|
"loss": 1.5817, |
|
"step": 16833 |
|
}, |
|
{ |
|
"epoch": 0.4268907775745648, |
|
"grad_norm": 1.411028265953064, |
|
"learning_rate": 2.2720743986335374e-05, |
|
"loss": 1.6059, |
|
"step": 16864 |
|
}, |
|
{ |
|
"epoch": 0.4276755032686356, |
|
"grad_norm": 1.4803740978240967, |
|
"learning_rate": 2.2637124345060233e-05, |
|
"loss": 1.6061, |
|
"step": 16895 |
|
}, |
|
{ |
|
"epoch": 0.4284602289627065, |
|
"grad_norm": 1.6195276975631714, |
|
"learning_rate": 2.2553531368515695e-05, |
|
"loss": 1.5948, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 0.4292449546567774, |
|
"grad_norm": 1.368160605430603, |
|
"learning_rate": 2.2469966000037144e-05, |
|
"loss": 1.5884, |
|
"step": 16957 |
|
}, |
|
{ |
|
"epoch": 0.4300296803508483, |
|
"grad_norm": 2.9462714195251465, |
|
"learning_rate": 2.2386429182648417e-05, |
|
"loss": 1.5834, |
|
"step": 16988 |
|
}, |
|
{ |
|
"epoch": 0.4308144060449192, |
|
"grad_norm": 1.319602370262146, |
|
"learning_rate": 2.230292185905114e-05, |
|
"loss": 1.571, |
|
"step": 17019 |
|
}, |
|
{ |
|
"epoch": 0.4315991317389901, |
|
"grad_norm": 1.412001371383667, |
|
"learning_rate": 2.2219444971614116e-05, |
|
"loss": 1.6091, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.432383857433061, |
|
"grad_norm": 1.4459586143493652, |
|
"learning_rate": 2.2135999462362655e-05, |
|
"loss": 1.5803, |
|
"step": 17081 |
|
}, |
|
{ |
|
"epoch": 0.4331685831271319, |
|
"grad_norm": 1.3342795372009277, |
|
"learning_rate": 2.2052586272968003e-05, |
|
"loss": 1.5809, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 0.43395330882120275, |
|
"grad_norm": 1.3263877630233765, |
|
"learning_rate": 2.196920634473666e-05, |
|
"loss": 1.5742, |
|
"step": 17143 |
|
}, |
|
{ |
|
"epoch": 0.43473803451527365, |
|
"grad_norm": 1.3818809986114502, |
|
"learning_rate": 2.1885860618599787e-05, |
|
"loss": 1.5701, |
|
"step": 17174 |
|
}, |
|
{ |
|
"epoch": 0.43552276020934455, |
|
"grad_norm": 1.4324009418487549, |
|
"learning_rate": 2.1802550035102577e-05, |
|
"loss": 1.5622, |
|
"step": 17205 |
|
}, |
|
{ |
|
"epoch": 0.43630748590341545, |
|
"grad_norm": 1.3489223718643188, |
|
"learning_rate": 2.171927553439363e-05, |
|
"loss": 1.5737, |
|
"step": 17236 |
|
}, |
|
{ |
|
"epoch": 0.43709221159748635, |
|
"grad_norm": 1.6844401359558105, |
|
"learning_rate": 2.1636038056214376e-05, |
|
"loss": 1.5916, |
|
"step": 17267 |
|
}, |
|
{ |
|
"epoch": 0.43787693729155724, |
|
"grad_norm": 1.3632712364196777, |
|
"learning_rate": 2.155283853988844e-05, |
|
"loss": 1.6055, |
|
"step": 17298 |
|
}, |
|
{ |
|
"epoch": 0.43866166298562814, |
|
"grad_norm": 1.4866870641708374, |
|
"learning_rate": 2.146967792431106e-05, |
|
"loss": 1.5858, |
|
"step": 17329 |
|
}, |
|
{ |
|
"epoch": 0.43944638867969904, |
|
"grad_norm": 1.5456846952438354, |
|
"learning_rate": 2.138655714793849e-05, |
|
"loss": 1.6098, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 0.44023111437376994, |
|
"grad_norm": 1.4177597761154175, |
|
"learning_rate": 2.1303477148777367e-05, |
|
"loss": 1.5833, |
|
"step": 17391 |
|
}, |
|
{ |
|
"epoch": 0.4410158400678408, |
|
"grad_norm": 1.4126933813095093, |
|
"learning_rate": 2.122043886437421e-05, |
|
"loss": 1.599, |
|
"step": 17422 |
|
}, |
|
{ |
|
"epoch": 0.4418005657619117, |
|
"grad_norm": 1.4183374643325806, |
|
"learning_rate": 2.1137443231804765e-05, |
|
"loss": 1.5941, |
|
"step": 17453 |
|
}, |
|
{ |
|
"epoch": 0.4425852914559826, |
|
"grad_norm": 1.4230761528015137, |
|
"learning_rate": 2.105449118766347e-05, |
|
"loss": 1.5743, |
|
"step": 17484 |
|
}, |
|
{ |
|
"epoch": 0.4433700171500535, |
|
"grad_norm": 1.6844847202301025, |
|
"learning_rate": 2.097158366805287e-05, |
|
"loss": 1.5672, |
|
"step": 17515 |
|
}, |
|
{ |
|
"epoch": 0.4441547428441244, |
|
"grad_norm": 1.410435438156128, |
|
"learning_rate": 2.0888721608573047e-05, |
|
"loss": 1.5896, |
|
"step": 17546 |
|
}, |
|
{ |
|
"epoch": 0.44493946853819527, |
|
"grad_norm": 1.3948931694030762, |
|
"learning_rate": 2.0805905944311087e-05, |
|
"loss": 1.5899, |
|
"step": 17577 |
|
}, |
|
{ |
|
"epoch": 0.44572419423226617, |
|
"grad_norm": 1.3747113943099976, |
|
"learning_rate": 2.0723137609830497e-05, |
|
"loss": 1.5576, |
|
"step": 17608 |
|
}, |
|
{ |
|
"epoch": 0.44650891992633707, |
|
"grad_norm": 1.477161169052124, |
|
"learning_rate": 2.0640417539160686e-05, |
|
"loss": 1.5576, |
|
"step": 17639 |
|
}, |
|
{ |
|
"epoch": 0.4472936456204079, |
|
"grad_norm": 1.372091293334961, |
|
"learning_rate": 2.0557746665786427e-05, |
|
"loss": 1.5958, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 0.4480783713144788, |
|
"grad_norm": 1.361820936203003, |
|
"learning_rate": 2.0475125922637256e-05, |
|
"loss": 1.5917, |
|
"step": 17701 |
|
}, |
|
{ |
|
"epoch": 0.4488630970085497, |
|
"grad_norm": 1.367297887802124, |
|
"learning_rate": 2.0392556242077047e-05, |
|
"loss": 1.5965, |
|
"step": 17732 |
|
}, |
|
{ |
|
"epoch": 0.4496478227026206, |
|
"grad_norm": 1.538565754890442, |
|
"learning_rate": 2.031003855589343e-05, |
|
"loss": 1.5814, |
|
"step": 17763 |
|
}, |
|
{ |
|
"epoch": 0.4504325483966915, |
|
"grad_norm": 1.4618374109268188, |
|
"learning_rate": 2.022757379528727e-05, |
|
"loss": 1.5852, |
|
"step": 17794 |
|
}, |
|
{ |
|
"epoch": 0.4512172740907624, |
|
"grad_norm": 1.3954309225082397, |
|
"learning_rate": 2.0145162890862184e-05, |
|
"loss": 1.5576, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 0.4520019997848333, |
|
"grad_norm": 1.33854079246521, |
|
"learning_rate": 2.0062806772614022e-05, |
|
"loss": 1.5793, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 0.4527867254789042, |
|
"grad_norm": 1.4751428365707397, |
|
"learning_rate": 1.9980506369920392e-05, |
|
"loss": 1.5831, |
|
"step": 17887 |
|
}, |
|
{ |
|
"epoch": 0.45357145117297504, |
|
"grad_norm": 1.3836451768875122, |
|
"learning_rate": 1.989826261153015e-05, |
|
"loss": 1.5967, |
|
"step": 17918 |
|
}, |
|
{ |
|
"epoch": 0.45435617686704594, |
|
"grad_norm": 1.4987123012542725, |
|
"learning_rate": 1.9816076425552923e-05, |
|
"loss": 1.5953, |
|
"step": 17949 |
|
}, |
|
{ |
|
"epoch": 0.45514090256111683, |
|
"grad_norm": 1.3838002681732178, |
|
"learning_rate": 1.9733948739448676e-05, |
|
"loss": 1.5614, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 0.45592562825518773, |
|
"grad_norm": 1.358023762702942, |
|
"learning_rate": 1.9651880480017155e-05, |
|
"loss": 1.5737, |
|
"step": 18011 |
|
}, |
|
{ |
|
"epoch": 0.45671035394925863, |
|
"grad_norm": 1.3181227445602417, |
|
"learning_rate": 1.9569872573387516e-05, |
|
"loss": 1.5806, |
|
"step": 18042 |
|
}, |
|
{ |
|
"epoch": 0.4574950796433295, |
|
"grad_norm": 1.3574905395507812, |
|
"learning_rate": 1.9487925945007854e-05, |
|
"loss": 1.5779, |
|
"step": 18073 |
|
}, |
|
{ |
|
"epoch": 0.4582798053374004, |
|
"grad_norm": 1.3550188541412354, |
|
"learning_rate": 1.9406041519634726e-05, |
|
"loss": 1.5723, |
|
"step": 18104 |
|
}, |
|
{ |
|
"epoch": 0.4590645310314713, |
|
"grad_norm": 1.3672763109207153, |
|
"learning_rate": 1.932422022132275e-05, |
|
"loss": 1.5869, |
|
"step": 18135 |
|
}, |
|
{ |
|
"epoch": 0.45984925672554217, |
|
"grad_norm": 1.428689956665039, |
|
"learning_rate": 1.924246297341414e-05, |
|
"loss": 1.5743, |
|
"step": 18166 |
|
}, |
|
{ |
|
"epoch": 0.46063398241961306, |
|
"grad_norm": 1.3313350677490234, |
|
"learning_rate": 1.9160770698528338e-05, |
|
"loss": 1.5836, |
|
"step": 18197 |
|
}, |
|
{ |
|
"epoch": 0.46141870811368396, |
|
"grad_norm": 1.3049378395080566, |
|
"learning_rate": 1.907914431855156e-05, |
|
"loss": 1.5753, |
|
"step": 18228 |
|
}, |
|
{ |
|
"epoch": 0.46220343380775486, |
|
"grad_norm": 1.3737244606018066, |
|
"learning_rate": 1.8997584754626412e-05, |
|
"loss": 1.589, |
|
"step": 18259 |
|
}, |
|
{ |
|
"epoch": 0.46298815950182576, |
|
"grad_norm": 1.4522390365600586, |
|
"learning_rate": 1.8916092927141486e-05, |
|
"loss": 1.5898, |
|
"step": 18290 |
|
} |
|
], |
|
"logging_steps": 31, |
|
"max_steps": 30517, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3052, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3583670324133626e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|