|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6953175924109432, |
|
"eval_steps": 500, |
|
"global_step": 27468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007847256940708912, |
|
"grad_norm": 26.94572639465332, |
|
"learning_rate": 1.0157273918741808e-06, |
|
"loss": 8.5879, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0015694513881417823, |
|
"grad_norm": 14.633014678955078, |
|
"learning_rate": 2.0314547837483616e-06, |
|
"loss": 7.5048, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.002354177082212673, |
|
"grad_norm": 15.984803199768066, |
|
"learning_rate": 3.0471821756225426e-06, |
|
"loss": 6.1391, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0031389027762835646, |
|
"grad_norm": 11.297175407409668, |
|
"learning_rate": 4.062909567496723e-06, |
|
"loss": 4.9299, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.003923628470354455, |
|
"grad_norm": 14.864474296569824, |
|
"learning_rate": 5.078636959370905e-06, |
|
"loss": 4.3205, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.004708354164425346, |
|
"grad_norm": 11.237608909606934, |
|
"learning_rate": 6.094364351245085e-06, |
|
"loss": 4.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.005493079858496238, |
|
"grad_norm": 23.79303550720215, |
|
"learning_rate": 7.110091743119267e-06, |
|
"loss": 3.7952, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.006277805552567129, |
|
"grad_norm": 15.1551513671875, |
|
"learning_rate": 8.125819134993446e-06, |
|
"loss": 3.689, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.00706253124663802, |
|
"grad_norm": 14.605571746826172, |
|
"learning_rate": 9.141546526867629e-06, |
|
"loss": 3.5147, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.00784725694070891, |
|
"grad_norm": 16.463390350341797, |
|
"learning_rate": 1.015727391874181e-05, |
|
"loss": 3.3901, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.008631982634779801, |
|
"grad_norm": 13.09945011138916, |
|
"learning_rate": 1.117300131061599e-05, |
|
"loss": 3.317, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.009416708328850693, |
|
"grad_norm": 11.993067741394043, |
|
"learning_rate": 1.218872870249017e-05, |
|
"loss": 3.2508, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.010201434022921584, |
|
"grad_norm": 10.388030052185059, |
|
"learning_rate": 1.3204456094364351e-05, |
|
"loss": 3.1239, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.010986159716992476, |
|
"grad_norm": 11.977804183959961, |
|
"learning_rate": 1.4220183486238533e-05, |
|
"loss": 3.0739, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.011770885411063367, |
|
"grad_norm": 8.925983428955078, |
|
"learning_rate": 1.5235910878112714e-05, |
|
"loss": 3.0169, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.012555611105134258, |
|
"grad_norm": 9.57411003112793, |
|
"learning_rate": 1.6251638269986893e-05, |
|
"loss": 2.959, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.01334033679920515, |
|
"grad_norm": 7.380288124084473, |
|
"learning_rate": 1.7267365661861077e-05, |
|
"loss": 2.8921, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.01412506249327604, |
|
"grad_norm": 8.812368392944336, |
|
"learning_rate": 1.8283093053735257e-05, |
|
"loss": 2.843, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.014909788187346932, |
|
"grad_norm": 8.870095252990723, |
|
"learning_rate": 1.9298820445609438e-05, |
|
"loss": 2.7895, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.01569451388141782, |
|
"grad_norm": 9.503872871398926, |
|
"learning_rate": 2.031454783748362e-05, |
|
"loss": 2.7757, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.016479239575488712, |
|
"grad_norm": 6.582827568054199, |
|
"learning_rate": 2.13302752293578e-05, |
|
"loss": 2.7099, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.017263965269559603, |
|
"grad_norm": 6.266632556915283, |
|
"learning_rate": 2.234600262123198e-05, |
|
"loss": 2.6729, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.018048690963630494, |
|
"grad_norm": 6.645415306091309, |
|
"learning_rate": 2.336173001310616e-05, |
|
"loss": 2.6616, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.018833416657701385, |
|
"grad_norm": 7.8323073387146, |
|
"learning_rate": 2.437745740498034e-05, |
|
"loss": 2.6291, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.019618142351772276, |
|
"grad_norm": 5.577521324157715, |
|
"learning_rate": 2.5393184796854525e-05, |
|
"loss": 2.6072, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.020402868045843167, |
|
"grad_norm": 5.603636264801025, |
|
"learning_rate": 2.6408912188728702e-05, |
|
"loss": 2.5787, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.021187593739914058, |
|
"grad_norm": 6.945438385009766, |
|
"learning_rate": 2.7424639580602886e-05, |
|
"loss": 2.5198, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.021972319433984953, |
|
"grad_norm": 5.6279826164245605, |
|
"learning_rate": 2.8440366972477066e-05, |
|
"loss": 2.5417, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.022757045128055844, |
|
"grad_norm": 5.517001628875732, |
|
"learning_rate": 2.9456094364351244e-05, |
|
"loss": 2.4849, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.023541770822126735, |
|
"grad_norm": 5.865486145019531, |
|
"learning_rate": 3.0471821756225428e-05, |
|
"loss": 2.5103, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.024326496516197626, |
|
"grad_norm": 4.949043273925781, |
|
"learning_rate": 3.148754914809961e-05, |
|
"loss": 2.4581, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.025111222210268517, |
|
"grad_norm": 4.701717853546143, |
|
"learning_rate": 3.2503276539973785e-05, |
|
"loss": 2.4315, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.025895947904339408, |
|
"grad_norm": 4.533145904541016, |
|
"learning_rate": 3.351900393184797e-05, |
|
"loss": 2.4056, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.0266806735984103, |
|
"grad_norm": 4.724672794342041, |
|
"learning_rate": 3.453473132372215e-05, |
|
"loss": 2.3994, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.02746539929248119, |
|
"grad_norm": 4.745669364929199, |
|
"learning_rate": 3.555045871559633e-05, |
|
"loss": 2.3546, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.02825012498655208, |
|
"grad_norm": 4.4554948806762695, |
|
"learning_rate": 3.6566186107470514e-05, |
|
"loss": 2.3642, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.029034850680622972, |
|
"grad_norm": 4.4792304039001465, |
|
"learning_rate": 3.7581913499344695e-05, |
|
"loss": 2.3296, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.029819576374693863, |
|
"grad_norm": 3.9329679012298584, |
|
"learning_rate": 3.8597640891218876e-05, |
|
"loss": 2.3105, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.030604302068764754, |
|
"grad_norm": 4.338287830352783, |
|
"learning_rate": 3.9613368283093056e-05, |
|
"loss": 2.2811, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.03138902776283564, |
|
"grad_norm": 4.130499839782715, |
|
"learning_rate": 4.062909567496724e-05, |
|
"loss": 2.2898, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.03217375345690653, |
|
"grad_norm": 3.5664470195770264, |
|
"learning_rate": 4.164482306684142e-05, |
|
"loss": 2.2786, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.032958479150977424, |
|
"grad_norm": 3.642627716064453, |
|
"learning_rate": 4.26605504587156e-05, |
|
"loss": 2.2439, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.033743204845048315, |
|
"grad_norm": 3.7562780380249023, |
|
"learning_rate": 4.367627785058978e-05, |
|
"loss": 2.2441, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.034527930539119206, |
|
"grad_norm": 3.3117406368255615, |
|
"learning_rate": 4.469200524246396e-05, |
|
"loss": 2.2604, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.0353126562331901, |
|
"grad_norm": 3.4313724040985107, |
|
"learning_rate": 4.570773263433814e-05, |
|
"loss": 2.2069, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.03609738192726099, |
|
"grad_norm": 3.4720091819763184, |
|
"learning_rate": 4.672346002621232e-05, |
|
"loss": 2.2087, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.03688210762133188, |
|
"grad_norm": 3.491856575012207, |
|
"learning_rate": 4.77391874180865e-05, |
|
"loss": 2.1808, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 0.03766683331540277, |
|
"grad_norm": 3.3730666637420654, |
|
"learning_rate": 4.875491480996068e-05, |
|
"loss": 2.1907, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.03845155900947366, |
|
"grad_norm": 2.894322395324707, |
|
"learning_rate": 4.977064220183487e-05, |
|
"loss": 2.1689, |
|
"step": 1519 |
|
}, |
|
{ |
|
"epoch": 0.03923628470354455, |
|
"grad_norm": 3.195884943008423, |
|
"learning_rate": 4.9999915451558777e-05, |
|
"loss": 2.194, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.04002101039761544, |
|
"grad_norm": 3.154061794281006, |
|
"learning_rate": 4.999955597496219e-05, |
|
"loss": 2.1409, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 0.040805736091686334, |
|
"grad_norm": 2.8204188346862793, |
|
"learning_rate": 4.9998914381774255e-05, |
|
"loss": 2.145, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.041590461785757225, |
|
"grad_norm": 2.98260760307312, |
|
"learning_rate": 4.999799067923527e-05, |
|
"loss": 2.1523, |
|
"step": 1643 |
|
}, |
|
{ |
|
"epoch": 0.042375187479828116, |
|
"grad_norm": 2.917949914932251, |
|
"learning_rate": 4.999678487776908e-05, |
|
"loss": 2.1221, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 0.04315991317389901, |
|
"grad_norm": 2.811469554901123, |
|
"learning_rate": 4.9995296990983006e-05, |
|
"loss": 2.1242, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.043944638867969905, |
|
"grad_norm": 3.067636728286743, |
|
"learning_rate": 4.999352703566763e-05, |
|
"loss": 2.1092, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.044729364562040796, |
|
"grad_norm": 2.6231868267059326, |
|
"learning_rate": 4.999147503179668e-05, |
|
"loss": 2.1018, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 0.04551409025611169, |
|
"grad_norm": 2.8247616291046143, |
|
"learning_rate": 4.998914100252672e-05, |
|
"loss": 2.074, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 0.04629881595018258, |
|
"grad_norm": 2.5960075855255127, |
|
"learning_rate": 4.998652497419696e-05, |
|
"loss": 2.0824, |
|
"step": 1829 |
|
}, |
|
{ |
|
"epoch": 0.04708354164425347, |
|
"grad_norm": 2.7796943187713623, |
|
"learning_rate": 4.9983626976328927e-05, |
|
"loss": 2.0998, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.04786826733832436, |
|
"grad_norm": 2.49242901802063, |
|
"learning_rate": 4.998044704162613e-05, |
|
"loss": 2.0893, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 0.04865299303239525, |
|
"grad_norm": 2.4294378757476807, |
|
"learning_rate": 4.9976985205973705e-05, |
|
"loss": 2.0617, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 0.04943771872646614, |
|
"grad_norm": 2.553217649459839, |
|
"learning_rate": 4.997324150843799e-05, |
|
"loss": 2.0632, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 0.050222444420537034, |
|
"grad_norm": 2.6711318492889404, |
|
"learning_rate": 4.99692159912661e-05, |
|
"loss": 2.0445, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.051007170114607925, |
|
"grad_norm": 2.714432716369629, |
|
"learning_rate": 4.996490869988546e-05, |
|
"loss": 2.0185, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.051791895808678816, |
|
"grad_norm": 2.6516053676605225, |
|
"learning_rate": 4.996031968290326e-05, |
|
"loss": 2.057, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 0.05257662150274971, |
|
"grad_norm": 2.4798831939697266, |
|
"learning_rate": 4.995544899210594e-05, |
|
"loss": 2.0199, |
|
"step": 2077 |
|
}, |
|
{ |
|
"epoch": 0.0533613471968206, |
|
"grad_norm": 2.5150041580200195, |
|
"learning_rate": 4.9950296682458583e-05, |
|
"loss": 2.0264, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 0.05414607289089149, |
|
"grad_norm": 2.637777805328369, |
|
"learning_rate": 4.994486281210429e-05, |
|
"loss": 2.0233, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 0.05493079858496238, |
|
"grad_norm": 2.330376148223877, |
|
"learning_rate": 4.9939147442363566e-05, |
|
"loss": 2.0201, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.05571552427903327, |
|
"grad_norm": 2.3436174392700195, |
|
"learning_rate": 4.9933150637733574e-05, |
|
"loss": 1.9865, |
|
"step": 2201 |
|
}, |
|
{ |
|
"epoch": 0.05650024997310416, |
|
"grad_norm": 2.7756845951080322, |
|
"learning_rate": 4.992687246588743e-05, |
|
"loss": 1.9983, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 0.05728497566717505, |
|
"grad_norm": 2.1725504398345947, |
|
"learning_rate": 4.992031299767347e-05, |
|
"loss": 1.9689, |
|
"step": 2263 |
|
}, |
|
{ |
|
"epoch": 0.058069701361245944, |
|
"grad_norm": 2.2163312435150146, |
|
"learning_rate": 4.9913472307114386e-05, |
|
"loss": 1.9829, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 0.058854427055316835, |
|
"grad_norm": 2.2829232215881348, |
|
"learning_rate": 4.9906350471406446e-05, |
|
"loss": 2.0142, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.059639152749387726, |
|
"grad_norm": 2.239596366882324, |
|
"learning_rate": 4.989894757091861e-05, |
|
"loss": 1.9697, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 0.06042387844345862, |
|
"grad_norm": 2.2926037311553955, |
|
"learning_rate": 4.989126368919158e-05, |
|
"loss": 1.9688, |
|
"step": 2387 |
|
}, |
|
{ |
|
"epoch": 0.06120860413752951, |
|
"grad_norm": 10.08767032623291, |
|
"learning_rate": 4.988329891293693e-05, |
|
"loss": 1.9845, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 0.0619933298316004, |
|
"grad_norm": 2.2427194118499756, |
|
"learning_rate": 4.987505333203608e-05, |
|
"loss": 1.9744, |
|
"step": 2449 |
|
}, |
|
{ |
|
"epoch": 0.06277805552567128, |
|
"grad_norm": 2.5111870765686035, |
|
"learning_rate": 4.9866527039539276e-05, |
|
"loss": 1.9526, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.06356278121974218, |
|
"grad_norm": 2.2100026607513428, |
|
"learning_rate": 4.9857720131664594e-05, |
|
"loss": 1.9826, |
|
"step": 2511 |
|
}, |
|
{ |
|
"epoch": 0.06434750691381307, |
|
"grad_norm": 2.2112088203430176, |
|
"learning_rate": 4.9848632707796773e-05, |
|
"loss": 1.9698, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 0.06513223260788396, |
|
"grad_norm": 2.404014825820923, |
|
"learning_rate": 4.9839264870486155e-05, |
|
"loss": 1.9628, |
|
"step": 2573 |
|
}, |
|
{ |
|
"epoch": 0.06591695830195485, |
|
"grad_norm": 2.526423692703247, |
|
"learning_rate": 4.9829616725447526e-05, |
|
"loss": 1.9481, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 0.06670168399602575, |
|
"grad_norm": 2.2506027221679688, |
|
"learning_rate": 4.981968838155888e-05, |
|
"loss": 1.9418, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.06748640969009663, |
|
"grad_norm": 2.4334371089935303, |
|
"learning_rate": 4.980947995086024e-05, |
|
"loss": 1.9423, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 0.06827113538416753, |
|
"grad_norm": 2.3028314113616943, |
|
"learning_rate": 4.979899154855234e-05, |
|
"loss": 1.9391, |
|
"step": 2697 |
|
}, |
|
{ |
|
"epoch": 0.06905586107823841, |
|
"grad_norm": 2.122143030166626, |
|
"learning_rate": 4.9788223292995386e-05, |
|
"loss": 1.933, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 0.06984058677230931, |
|
"grad_norm": 2.1335129737854004, |
|
"learning_rate": 4.977717530570768e-05, |
|
"loss": 1.9212, |
|
"step": 2759 |
|
}, |
|
{ |
|
"epoch": 0.0706253124663802, |
|
"grad_norm": 2.198650598526001, |
|
"learning_rate": 4.976584771136425e-05, |
|
"loss": 1.9217, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.07141003816045109, |
|
"grad_norm": 2.4985201358795166, |
|
"learning_rate": 4.975424063779547e-05, |
|
"loss": 1.9277, |
|
"step": 2821 |
|
}, |
|
{ |
|
"epoch": 0.07219476385452198, |
|
"grad_norm": 1.9877598285675049, |
|
"learning_rate": 4.974235421598557e-05, |
|
"loss": 1.9278, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 0.07297948954859287, |
|
"grad_norm": 3.0082573890686035, |
|
"learning_rate": 4.973018858007122e-05, |
|
"loss": 1.9261, |
|
"step": 2883 |
|
}, |
|
{ |
|
"epoch": 0.07376421524266376, |
|
"grad_norm": 2.139742851257324, |
|
"learning_rate": 4.9717743867339963e-05, |
|
"loss": 1.9168, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 0.07454894093673466, |
|
"grad_norm": 2.1748037338256836, |
|
"learning_rate": 4.9705020218228695e-05, |
|
"loss": 1.9132, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.07533366663080554, |
|
"grad_norm": 2.0570950508117676, |
|
"learning_rate": 4.969201777632205e-05, |
|
"loss": 1.9177, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.07611839232487644, |
|
"grad_norm": 1.9970216751098633, |
|
"learning_rate": 4.9678736688350846e-05, |
|
"loss": 1.9105, |
|
"step": 3007 |
|
}, |
|
{ |
|
"epoch": 0.07690311801894732, |
|
"grad_norm": 1.9640527963638306, |
|
"learning_rate": 4.966517710419033e-05, |
|
"loss": 1.9084, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 0.07768784371301822, |
|
"grad_norm": 2.172874927520752, |
|
"learning_rate": 4.965133917685858e-05, |
|
"loss": 1.8995, |
|
"step": 3069 |
|
}, |
|
{ |
|
"epoch": 0.0784725694070891, |
|
"grad_norm": 2.1881916522979736, |
|
"learning_rate": 4.9637223062514714e-05, |
|
"loss": 1.9019, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.07925729510116, |
|
"grad_norm": 1.975496530532837, |
|
"learning_rate": 4.962282892045718e-05, |
|
"loss": 1.8967, |
|
"step": 3131 |
|
}, |
|
{ |
|
"epoch": 0.08004202079523089, |
|
"grad_norm": 2.0970685482025146, |
|
"learning_rate": 4.9608156913121904e-05, |
|
"loss": 1.8867, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 0.08082674648930178, |
|
"grad_norm": 2.096353769302368, |
|
"learning_rate": 4.959320720608049e-05, |
|
"loss": 1.8967, |
|
"step": 3193 |
|
}, |
|
{ |
|
"epoch": 0.08161147218337267, |
|
"grad_norm": 1.998336911201477, |
|
"learning_rate": 4.9577979968038354e-05, |
|
"loss": 1.8876, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 0.08239619787744357, |
|
"grad_norm": 2.098055362701416, |
|
"learning_rate": 4.956247537083282e-05, |
|
"loss": 1.9, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.08318092357151445, |
|
"grad_norm": 2.0739505290985107, |
|
"learning_rate": 4.9546693589431145e-05, |
|
"loss": 1.8902, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 0.08396564926558535, |
|
"grad_norm": 1.9556243419647217, |
|
"learning_rate": 4.9530634801928595e-05, |
|
"loss": 1.888, |
|
"step": 3317 |
|
}, |
|
{ |
|
"epoch": 0.08475037495965623, |
|
"grad_norm": 2.096874952316284, |
|
"learning_rate": 4.9514299189546395e-05, |
|
"loss": 1.8785, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 0.08553510065372713, |
|
"grad_norm": 1.9407072067260742, |
|
"learning_rate": 4.949768693662973e-05, |
|
"loss": 1.8646, |
|
"step": 3379 |
|
}, |
|
{ |
|
"epoch": 0.08631982634779801, |
|
"grad_norm": 1.9928467273712158, |
|
"learning_rate": 4.948079823064559e-05, |
|
"loss": 1.8751, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.08710455204186891, |
|
"grad_norm": 1.9670037031173706, |
|
"learning_rate": 4.946363326218074e-05, |
|
"loss": 1.8831, |
|
"step": 3441 |
|
}, |
|
{ |
|
"epoch": 0.08788927773593981, |
|
"grad_norm": 1.999193787574768, |
|
"learning_rate": 4.9446192224939525e-05, |
|
"loss": 1.8605, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 0.0886740034300107, |
|
"grad_norm": 1.9073724746704102, |
|
"learning_rate": 4.942847531574167e-05, |
|
"loss": 1.8576, |
|
"step": 3503 |
|
}, |
|
{ |
|
"epoch": 0.08945872912408159, |
|
"grad_norm": 2.179824113845825, |
|
"learning_rate": 4.941048273452008e-05, |
|
"loss": 1.8682, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 0.09024345481815248, |
|
"grad_norm": 1.954990029335022, |
|
"learning_rate": 4.9392214684318605e-05, |
|
"loss": 1.8807, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.09102818051222338, |
|
"grad_norm": 1.7695640325546265, |
|
"learning_rate": 4.93736713712897e-05, |
|
"loss": 1.879, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 0.09181290620629426, |
|
"grad_norm": 1.7708550691604614, |
|
"learning_rate": 4.9354853004692124e-05, |
|
"loss": 1.8677, |
|
"step": 3627 |
|
}, |
|
{ |
|
"epoch": 0.09259763190036516, |
|
"grad_norm": 1.9683934450149536, |
|
"learning_rate": 4.93357597968886e-05, |
|
"loss": 1.8595, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 0.09338235759443604, |
|
"grad_norm": 2.00441312789917, |
|
"learning_rate": 4.931639196334338e-05, |
|
"loss": 1.8462, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 0.09416708328850694, |
|
"grad_norm": 1.875543475151062, |
|
"learning_rate": 4.9296749722619826e-05, |
|
"loss": 1.8502, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.09495180898257782, |
|
"grad_norm": 1.932658314704895, |
|
"learning_rate": 4.9276833296377966e-05, |
|
"loss": 1.8457, |
|
"step": 3751 |
|
}, |
|
{ |
|
"epoch": 0.09573653467664872, |
|
"grad_norm": 1.9957045316696167, |
|
"learning_rate": 4.925664290937196e-05, |
|
"loss": 1.843, |
|
"step": 3782 |
|
}, |
|
{ |
|
"epoch": 0.0965212603707196, |
|
"grad_norm": 1.8579176664352417, |
|
"learning_rate": 4.9236178789447576e-05, |
|
"loss": 1.8504, |
|
"step": 3813 |
|
}, |
|
{ |
|
"epoch": 0.0973059860647905, |
|
"grad_norm": 1.9646131992340088, |
|
"learning_rate": 4.921544116753962e-05, |
|
"loss": 1.8512, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 0.09809071175886139, |
|
"grad_norm": 1.8213136196136475, |
|
"learning_rate": 4.919443027766935e-05, |
|
"loss": 1.8618, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.09887543745293229, |
|
"grad_norm": 2.017280101776123, |
|
"learning_rate": 4.91731463569418e-05, |
|
"loss": 1.863, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 0.09966016314700317, |
|
"grad_norm": 1.9125665426254272, |
|
"learning_rate": 4.915158964554312e-05, |
|
"loss": 1.8259, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 0.10044488884107407, |
|
"grad_norm": 2.0414695739746094, |
|
"learning_rate": 4.912976038673786e-05, |
|
"loss": 1.8347, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.10122961453514495, |
|
"grad_norm": 1.7705485820770264, |
|
"learning_rate": 4.9107658826866254e-05, |
|
"loss": 1.8502, |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 0.10201434022921585, |
|
"grad_norm": 1.8961102962493896, |
|
"learning_rate": 4.908528521534139e-05, |
|
"loss": 1.84, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.10279906592328673, |
|
"grad_norm": 1.784387230873108, |
|
"learning_rate": 4.906263980464644e-05, |
|
"loss": 1.842, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 0.10358379161735763, |
|
"grad_norm": 11.229472160339355, |
|
"learning_rate": 4.903972285033178e-05, |
|
"loss": 1.8476, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 0.10436851731142852, |
|
"grad_norm": 1.9657154083251953, |
|
"learning_rate": 4.901653461101213e-05, |
|
"loss": 1.8465, |
|
"step": 4123 |
|
}, |
|
{ |
|
"epoch": 0.10515324300549941, |
|
"grad_norm": 1.7702244520187378, |
|
"learning_rate": 4.8993075348363626e-05, |
|
"loss": 1.8249, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 0.1059379686995703, |
|
"grad_norm": 1.8672112226486206, |
|
"learning_rate": 4.896934532712084e-05, |
|
"loss": 1.8232, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.1067226943936412, |
|
"grad_norm": 1.7806147336959839, |
|
"learning_rate": 4.8945344815073846e-05, |
|
"loss": 1.8256, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 0.10750742008771208, |
|
"grad_norm": 1.7830456495285034, |
|
"learning_rate": 4.892107408306516e-05, |
|
"loss": 1.8271, |
|
"step": 4247 |
|
}, |
|
{ |
|
"epoch": 0.10829214578178298, |
|
"grad_norm": 1.96640944480896, |
|
"learning_rate": 4.889653340498669e-05, |
|
"loss": 1.82, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 0.10907687147585386, |
|
"grad_norm": 1.8224470615386963, |
|
"learning_rate": 4.8871723057776664e-05, |
|
"loss": 1.8216, |
|
"step": 4309 |
|
}, |
|
{ |
|
"epoch": 0.10986159716992476, |
|
"grad_norm": 2.5164501667022705, |
|
"learning_rate": 4.8846643321416476e-05, |
|
"loss": 1.8252, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.11064632286399564, |
|
"grad_norm": 1.7248613834381104, |
|
"learning_rate": 4.882129447892753e-05, |
|
"loss": 1.8133, |
|
"step": 4371 |
|
}, |
|
{ |
|
"epoch": 0.11143104855806654, |
|
"grad_norm": 2.060304880142212, |
|
"learning_rate": 4.8795676816368076e-05, |
|
"loss": 1.8282, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 0.11221577425213743, |
|
"grad_norm": 1.8709039688110352, |
|
"learning_rate": 4.876979062282995e-05, |
|
"loss": 1.8154, |
|
"step": 4433 |
|
}, |
|
{ |
|
"epoch": 0.11300049994620832, |
|
"grad_norm": 1.7444674968719482, |
|
"learning_rate": 4.8743636190435325e-05, |
|
"loss": 1.8173, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 0.11378522564027921, |
|
"grad_norm": 1.7357319593429565, |
|
"learning_rate": 4.871721381433344e-05, |
|
"loss": 1.8351, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.1145699513343501, |
|
"grad_norm": 1.728070855140686, |
|
"learning_rate": 4.869052379269719e-05, |
|
"loss": 1.8119, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 0.11535467702842099, |
|
"grad_norm": 1.742035984992981, |
|
"learning_rate": 4.866356642671985e-05, |
|
"loss": 1.7967, |
|
"step": 4557 |
|
}, |
|
{ |
|
"epoch": 0.11613940272249189, |
|
"grad_norm": 1.7010915279388428, |
|
"learning_rate": 4.8636342020611634e-05, |
|
"loss": 1.8004, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 0.11692412841656277, |
|
"grad_norm": 1.6775914430618286, |
|
"learning_rate": 4.860885088159626e-05, |
|
"loss": 1.8173, |
|
"step": 4619 |
|
}, |
|
{ |
|
"epoch": 0.11770885411063367, |
|
"grad_norm": 1.9107964038848877, |
|
"learning_rate": 4.858109331990751e-05, |
|
"loss": 1.7984, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.11849357980470455, |
|
"grad_norm": 1.713429570198059, |
|
"learning_rate": 4.855306964878567e-05, |
|
"loss": 1.7967, |
|
"step": 4681 |
|
}, |
|
{ |
|
"epoch": 0.11927830549877545, |
|
"grad_norm": 1.9373931884765625, |
|
"learning_rate": 4.8524780184474084e-05, |
|
"loss": 1.8072, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 0.12006303119284634, |
|
"grad_norm": 1.8975365161895752, |
|
"learning_rate": 4.8496225246215496e-05, |
|
"loss": 1.8121, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 0.12084775688691723, |
|
"grad_norm": 5.285326957702637, |
|
"learning_rate": 4.8467405156248505e-05, |
|
"loss": 1.8189, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 0.12163248258098812, |
|
"grad_norm": 1.7155263423919678, |
|
"learning_rate": 4.843832023980392e-05, |
|
"loss": 1.8093, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.12241720827505902, |
|
"grad_norm": 1.726831316947937, |
|
"learning_rate": 4.840897082510106e-05, |
|
"loss": 1.7952, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 0.1232019339691299, |
|
"grad_norm": 1.739639401435852, |
|
"learning_rate": 4.8379357243344084e-05, |
|
"loss": 1.8103, |
|
"step": 4867 |
|
}, |
|
{ |
|
"epoch": 0.1239866596632008, |
|
"grad_norm": 1.6978296041488647, |
|
"learning_rate": 4.8349479828718236e-05, |
|
"loss": 1.8006, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 0.12477138535727168, |
|
"grad_norm": 1.7154194116592407, |
|
"learning_rate": 4.8319338918386075e-05, |
|
"loss": 1.7876, |
|
"step": 4929 |
|
}, |
|
{ |
|
"epoch": 0.12555611105134257, |
|
"grad_norm": 1.6323316097259521, |
|
"learning_rate": 4.828893485248369e-05, |
|
"loss": 1.8159, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.12634083674541347, |
|
"grad_norm": 1.641784429550171, |
|
"learning_rate": 4.825826797411682e-05, |
|
"loss": 1.7959, |
|
"step": 4991 |
|
}, |
|
{ |
|
"epoch": 0.12712556243948436, |
|
"grad_norm": 1.6947154998779297, |
|
"learning_rate": 4.822733862935702e-05, |
|
"loss": 1.7895, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 0.12791028813355526, |
|
"grad_norm": 1.6331220865249634, |
|
"learning_rate": 4.819614716723775e-05, |
|
"loss": 1.7707, |
|
"step": 5053 |
|
}, |
|
{ |
|
"epoch": 0.12869501382762613, |
|
"grad_norm": 1.8207937479019165, |
|
"learning_rate": 4.8164693939750425e-05, |
|
"loss": 1.8123, |
|
"step": 5084 |
|
}, |
|
{ |
|
"epoch": 0.12947973952169703, |
|
"grad_norm": 1.6664263010025024, |
|
"learning_rate": 4.813297930184042e-05, |
|
"loss": 1.8089, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.13026446521576793, |
|
"grad_norm": 1.9931398630142212, |
|
"learning_rate": 4.810100361140314e-05, |
|
"loss": 1.7757, |
|
"step": 5146 |
|
}, |
|
{ |
|
"epoch": 0.13104919090983883, |
|
"grad_norm": 1.839200735092163, |
|
"learning_rate": 4.8068767229279885e-05, |
|
"loss": 1.7969, |
|
"step": 5177 |
|
}, |
|
{ |
|
"epoch": 0.1318339166039097, |
|
"grad_norm": 1.781187653541565, |
|
"learning_rate": 4.8036270519253854e-05, |
|
"loss": 1.7937, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 0.1326186422979806, |
|
"grad_norm": 1.7144343852996826, |
|
"learning_rate": 4.8003513848046e-05, |
|
"loss": 1.7816, |
|
"step": 5239 |
|
}, |
|
{ |
|
"epoch": 0.1334033679920515, |
|
"grad_norm": 1.6819554567337036, |
|
"learning_rate": 4.79704975853109e-05, |
|
"loss": 1.7851, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.1341880936861224, |
|
"grad_norm": 1.6748546361923218, |
|
"learning_rate": 4.793722210363262e-05, |
|
"loss": 1.7941, |
|
"step": 5301 |
|
}, |
|
{ |
|
"epoch": 0.13497281938019326, |
|
"grad_norm": 1.615569829940796, |
|
"learning_rate": 4.7903687778520414e-05, |
|
"loss": 1.7799, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 0.13575754507426416, |
|
"grad_norm": 1.7959198951721191, |
|
"learning_rate": 4.7869894988404593e-05, |
|
"loss": 1.7802, |
|
"step": 5363 |
|
}, |
|
{ |
|
"epoch": 0.13654227076833506, |
|
"grad_norm": 1.598946452140808, |
|
"learning_rate": 4.783584411463221e-05, |
|
"loss": 1.7929, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 0.13732699646240595, |
|
"grad_norm": 1.793511986732483, |
|
"learning_rate": 4.780153554146274e-05, |
|
"loss": 1.7591, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.13811172215647682, |
|
"grad_norm": 1.718671202659607, |
|
"learning_rate": 4.7766969656063766e-05, |
|
"loss": 1.7807, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 0.13889644785054772, |
|
"grad_norm": 1.6548669338226318, |
|
"learning_rate": 4.773214684850662e-05, |
|
"loss": 1.775, |
|
"step": 5487 |
|
}, |
|
{ |
|
"epoch": 0.13968117354461862, |
|
"grad_norm": 1.6727256774902344, |
|
"learning_rate": 4.769706751176193e-05, |
|
"loss": 1.7756, |
|
"step": 5518 |
|
}, |
|
{ |
|
"epoch": 0.14046589923868952, |
|
"grad_norm": 1.7169344425201416, |
|
"learning_rate": 4.7661732041695264e-05, |
|
"loss": 1.7887, |
|
"step": 5549 |
|
}, |
|
{ |
|
"epoch": 0.1412506249327604, |
|
"grad_norm": 1.6376421451568604, |
|
"learning_rate": 4.762614083706258e-05, |
|
"loss": 1.7939, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.14203535062683129, |
|
"grad_norm": 1.7083207368850708, |
|
"learning_rate": 4.759029429950581e-05, |
|
"loss": 1.7705, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 0.14282007632090218, |
|
"grad_norm": 1.6359349489212036, |
|
"learning_rate": 4.7554192833548235e-05, |
|
"loss": 1.7732, |
|
"step": 5642 |
|
}, |
|
{ |
|
"epoch": 0.14360480201497308, |
|
"grad_norm": 1.684005618095398, |
|
"learning_rate": 4.751783684659e-05, |
|
"loss": 1.7766, |
|
"step": 5673 |
|
}, |
|
{ |
|
"epoch": 0.14438952770904395, |
|
"grad_norm": 1.7531359195709229, |
|
"learning_rate": 4.748122674890348e-05, |
|
"loss": 1.7815, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 0.14517425340311485, |
|
"grad_norm": 1.5898247957229614, |
|
"learning_rate": 4.7444362953628654e-05, |
|
"loss": 1.7837, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 0.14595897909718575, |
|
"grad_norm": 1.6781623363494873, |
|
"learning_rate": 4.7407245876768424e-05, |
|
"loss": 1.7381, |
|
"step": 5766 |
|
}, |
|
{ |
|
"epoch": 0.14674370479125665, |
|
"grad_norm": 1.6126357316970825, |
|
"learning_rate": 4.736987593718397e-05, |
|
"loss": 1.7714, |
|
"step": 5797 |
|
}, |
|
{ |
|
"epoch": 0.14752843048532752, |
|
"grad_norm": 1.6623587608337402, |
|
"learning_rate": 4.733225355658999e-05, |
|
"loss": 1.7625, |
|
"step": 5828 |
|
}, |
|
{ |
|
"epoch": 0.14831315617939841, |
|
"grad_norm": 1.6715524196624756, |
|
"learning_rate": 4.7294379159549926e-05, |
|
"loss": 1.7631, |
|
"step": 5859 |
|
}, |
|
{ |
|
"epoch": 0.1490978818734693, |
|
"grad_norm": 1.6739026308059692, |
|
"learning_rate": 4.725625317347119e-05, |
|
"loss": 1.775, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.1498826075675402, |
|
"grad_norm": 1.8141075372695923, |
|
"learning_rate": 4.7217876028600374e-05, |
|
"loss": 1.7881, |
|
"step": 5921 |
|
}, |
|
{ |
|
"epoch": 0.15066733326161108, |
|
"grad_norm": 1.6842069625854492, |
|
"learning_rate": 4.717924815801832e-05, |
|
"loss": 1.7707, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.15145205895568198, |
|
"grad_norm": 1.7032698392868042, |
|
"learning_rate": 4.714036999763532e-05, |
|
"loss": 1.7631, |
|
"step": 5983 |
|
}, |
|
{ |
|
"epoch": 0.15223678464975288, |
|
"grad_norm": 1.7856013774871826, |
|
"learning_rate": 4.7101241986186116e-05, |
|
"loss": 1.7545, |
|
"step": 6014 |
|
}, |
|
{ |
|
"epoch": 0.15302151034382377, |
|
"grad_norm": 1.679623007774353, |
|
"learning_rate": 4.7061864565225e-05, |
|
"loss": 1.7676, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 0.15380623603789464, |
|
"grad_norm": 1.626792073249817, |
|
"learning_rate": 4.702223817912081e-05, |
|
"loss": 1.7434, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 0.15459096173196554, |
|
"grad_norm": 1.850042700767517, |
|
"learning_rate": 4.698236327505195e-05, |
|
"loss": 1.7805, |
|
"step": 6107 |
|
}, |
|
{ |
|
"epoch": 0.15537568742603644, |
|
"grad_norm": 1.6403062343597412, |
|
"learning_rate": 4.694224030300127e-05, |
|
"loss": 1.7495, |
|
"step": 6138 |
|
}, |
|
{ |
|
"epoch": 0.15616041312010734, |
|
"grad_norm": 1.5897477865219116, |
|
"learning_rate": 4.690186971575107e-05, |
|
"loss": 1.779, |
|
"step": 6169 |
|
}, |
|
{ |
|
"epoch": 0.1569451388141782, |
|
"grad_norm": 1.8173433542251587, |
|
"learning_rate": 4.6861251968877916e-05, |
|
"loss": 1.7705, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.1577298645082491, |
|
"grad_norm": 1.788022756576538, |
|
"learning_rate": 4.68203875207476e-05, |
|
"loss": 1.7457, |
|
"step": 6231 |
|
}, |
|
{ |
|
"epoch": 0.15851459020232, |
|
"grad_norm": 1.6219838857650757, |
|
"learning_rate": 4.677927683250983e-05, |
|
"loss": 1.7758, |
|
"step": 6262 |
|
}, |
|
{ |
|
"epoch": 0.1592993158963909, |
|
"grad_norm": 1.678890347480774, |
|
"learning_rate": 4.6737920368093156e-05, |
|
"loss": 1.7394, |
|
"step": 6293 |
|
}, |
|
{ |
|
"epoch": 0.16008404159046177, |
|
"grad_norm": 1.5719743967056274, |
|
"learning_rate": 4.669631859419965e-05, |
|
"loss": 1.7549, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 0.16086876728453267, |
|
"grad_norm": 1.6332769393920898, |
|
"learning_rate": 4.6654471980299676e-05, |
|
"loss": 1.7462, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 0.16165349297860357, |
|
"grad_norm": 1.6942561864852905, |
|
"learning_rate": 4.661238099862658e-05, |
|
"loss": 1.7506, |
|
"step": 6386 |
|
}, |
|
{ |
|
"epoch": 0.16243821867267447, |
|
"grad_norm": 1.8173885345458984, |
|
"learning_rate": 4.657004612417138e-05, |
|
"loss": 1.7455, |
|
"step": 6417 |
|
}, |
|
{ |
|
"epoch": 0.16322294436674534, |
|
"grad_norm": 1.6209042072296143, |
|
"learning_rate": 4.6527467834677374e-05, |
|
"loss": 1.7413, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 0.16400767006081624, |
|
"grad_norm": 1.5801094770431519, |
|
"learning_rate": 4.648464661063478e-05, |
|
"loss": 1.7491, |
|
"step": 6479 |
|
}, |
|
{ |
|
"epoch": 0.16479239575488713, |
|
"grad_norm": 1.5499264001846313, |
|
"learning_rate": 4.6441582935275264e-05, |
|
"loss": 1.7276, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.16557712144895803, |
|
"grad_norm": 1.6154171228408813, |
|
"learning_rate": 4.6398277294566586e-05, |
|
"loss": 1.7816, |
|
"step": 6541 |
|
}, |
|
{ |
|
"epoch": 0.1663618471430289, |
|
"grad_norm": 1.5633410215377808, |
|
"learning_rate": 4.6354730177207e-05, |
|
"loss": 1.7447, |
|
"step": 6572 |
|
}, |
|
{ |
|
"epoch": 0.1671465728370998, |
|
"grad_norm": 1.7070655822753906, |
|
"learning_rate": 4.6310942074619787e-05, |
|
"loss": 1.7477, |
|
"step": 6603 |
|
}, |
|
{ |
|
"epoch": 0.1679312985311707, |
|
"grad_norm": 1.7502373456954956, |
|
"learning_rate": 4.626691348094777e-05, |
|
"loss": 1.74, |
|
"step": 6634 |
|
}, |
|
{ |
|
"epoch": 0.1687160242252416, |
|
"grad_norm": 1.9541263580322266, |
|
"learning_rate": 4.622264489304762e-05, |
|
"loss": 1.7389, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 0.16950074991931247, |
|
"grad_norm": 1.64599609375, |
|
"learning_rate": 4.617813681048434e-05, |
|
"loss": 1.7445, |
|
"step": 6696 |
|
}, |
|
{ |
|
"epoch": 0.17028547561338336, |
|
"grad_norm": 1.9360859394073486, |
|
"learning_rate": 4.61333897355256e-05, |
|
"loss": 1.73, |
|
"step": 6727 |
|
}, |
|
{ |
|
"epoch": 0.17107020130745426, |
|
"grad_norm": 1.693892240524292, |
|
"learning_rate": 4.608840417313604e-05, |
|
"loss": 1.7229, |
|
"step": 6758 |
|
}, |
|
{ |
|
"epoch": 0.17185492700152516, |
|
"grad_norm": 1.6243150234222412, |
|
"learning_rate": 4.6043180630971646e-05, |
|
"loss": 1.7421, |
|
"step": 6789 |
|
}, |
|
{ |
|
"epoch": 0.17263965269559603, |
|
"grad_norm": 1.5926107168197632, |
|
"learning_rate": 4.599771961937391e-05, |
|
"loss": 1.7447, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.17342437838966693, |
|
"grad_norm": 1.695167064666748, |
|
"learning_rate": 4.5952021651364204e-05, |
|
"loss": 1.7463, |
|
"step": 6851 |
|
}, |
|
{ |
|
"epoch": 0.17420910408373783, |
|
"grad_norm": 1.5915182828903198, |
|
"learning_rate": 4.590608724263786e-05, |
|
"loss": 1.7198, |
|
"step": 6882 |
|
}, |
|
{ |
|
"epoch": 0.17499382977780872, |
|
"grad_norm": 1.6135920286178589, |
|
"learning_rate": 4.585991691155845e-05, |
|
"loss": 1.7233, |
|
"step": 6913 |
|
}, |
|
{ |
|
"epoch": 0.17577855547187962, |
|
"grad_norm": 1.5855350494384766, |
|
"learning_rate": 4.581351117915188e-05, |
|
"loss": 1.7519, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 0.1765632811659505, |
|
"grad_norm": 1.5782060623168945, |
|
"learning_rate": 4.5766870569100534e-05, |
|
"loss": 1.729, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.1773480068600214, |
|
"grad_norm": 1.4931174516677856, |
|
"learning_rate": 4.571999560773736e-05, |
|
"loss": 1.7197, |
|
"step": 7006 |
|
}, |
|
{ |
|
"epoch": 0.1781327325540923, |
|
"grad_norm": 1.809645414352417, |
|
"learning_rate": 4.5672886824039915e-05, |
|
"loss": 1.7409, |
|
"step": 7037 |
|
}, |
|
{ |
|
"epoch": 0.17891745824816319, |
|
"grad_norm": 1.544233798980713, |
|
"learning_rate": 4.5625544749624435e-05, |
|
"loss": 1.7331, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 0.17970218394223406, |
|
"grad_norm": 1.5316941738128662, |
|
"learning_rate": 4.5577969918739794e-05, |
|
"loss": 1.7245, |
|
"step": 7099 |
|
}, |
|
{ |
|
"epoch": 0.18048690963630495, |
|
"grad_norm": 1.4646427631378174, |
|
"learning_rate": 4.5530162868261486e-05, |
|
"loss": 1.7341, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.18127163533037585, |
|
"grad_norm": 1.6266372203826904, |
|
"learning_rate": 4.548212413768558e-05, |
|
"loss": 1.7311, |
|
"step": 7161 |
|
}, |
|
{ |
|
"epoch": 0.18205636102444675, |
|
"grad_norm": 1.6372709274291992, |
|
"learning_rate": 4.543385426912261e-05, |
|
"loss": 1.7344, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 0.18284108671851762, |
|
"grad_norm": 1.642005443572998, |
|
"learning_rate": 4.53853538072915e-05, |
|
"loss": 1.7472, |
|
"step": 7223 |
|
}, |
|
{ |
|
"epoch": 0.18362581241258852, |
|
"grad_norm": 1.7344322204589844, |
|
"learning_rate": 4.533662329951336e-05, |
|
"loss": 1.7379, |
|
"step": 7254 |
|
}, |
|
{ |
|
"epoch": 0.18441053810665942, |
|
"grad_norm": 1.6593672037124634, |
|
"learning_rate": 4.528766329570536e-05, |
|
"loss": 1.7363, |
|
"step": 7285 |
|
}, |
|
{ |
|
"epoch": 0.18519526380073031, |
|
"grad_norm": 1.590846300125122, |
|
"learning_rate": 4.523847434837447e-05, |
|
"loss": 1.7432, |
|
"step": 7316 |
|
}, |
|
{ |
|
"epoch": 0.18597998949480118, |
|
"grad_norm": 1.6701788902282715, |
|
"learning_rate": 4.518905701261128e-05, |
|
"loss": 1.7287, |
|
"step": 7347 |
|
}, |
|
{ |
|
"epoch": 0.18676471518887208, |
|
"grad_norm": 1.6129958629608154, |
|
"learning_rate": 4.5139411846083715e-05, |
|
"loss": 1.7252, |
|
"step": 7378 |
|
}, |
|
{ |
|
"epoch": 0.18754944088294298, |
|
"grad_norm": 1.5602383613586426, |
|
"learning_rate": 4.508953940903073e-05, |
|
"loss": 1.7365, |
|
"step": 7409 |
|
}, |
|
{ |
|
"epoch": 0.18833416657701388, |
|
"grad_norm": 1.60308039188385, |
|
"learning_rate": 4.5039440264255994e-05, |
|
"loss": 1.7361, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.18911889227108475, |
|
"grad_norm": 1.588299036026001, |
|
"learning_rate": 4.498911497712155e-05, |
|
"loss": 1.7574, |
|
"step": 7471 |
|
}, |
|
{ |
|
"epoch": 0.18990361796515565, |
|
"grad_norm": 1.5599571466445923, |
|
"learning_rate": 4.493856411554142e-05, |
|
"loss": 1.738, |
|
"step": 7502 |
|
}, |
|
{ |
|
"epoch": 0.19068834365922654, |
|
"grad_norm": 1.5749436616897583, |
|
"learning_rate": 4.4887788249975206e-05, |
|
"loss": 1.7272, |
|
"step": 7533 |
|
}, |
|
{ |
|
"epoch": 0.19147306935329744, |
|
"grad_norm": 1.5536047220230103, |
|
"learning_rate": 4.4836787953421656e-05, |
|
"loss": 1.7249, |
|
"step": 7564 |
|
}, |
|
{ |
|
"epoch": 0.1922577950473683, |
|
"grad_norm": 1.5227411985397339, |
|
"learning_rate": 4.478556380141218e-05, |
|
"loss": 1.7137, |
|
"step": 7595 |
|
}, |
|
{ |
|
"epoch": 0.1930425207414392, |
|
"grad_norm": 1.5771219730377197, |
|
"learning_rate": 4.4734116372004375e-05, |
|
"loss": 1.7094, |
|
"step": 7626 |
|
}, |
|
{ |
|
"epoch": 0.1938272464355101, |
|
"grad_norm": 1.4533522129058838, |
|
"learning_rate": 4.4682446245775477e-05, |
|
"loss": 1.7493, |
|
"step": 7657 |
|
}, |
|
{ |
|
"epoch": 0.194611972129581, |
|
"grad_norm": 1.5640264749526978, |
|
"learning_rate": 4.463055400581586e-05, |
|
"loss": 1.7228, |
|
"step": 7688 |
|
}, |
|
{ |
|
"epoch": 0.19539669782365188, |
|
"grad_norm": 1.4606215953826904, |
|
"learning_rate": 4.4578440237722374e-05, |
|
"loss": 1.7414, |
|
"step": 7719 |
|
}, |
|
{ |
|
"epoch": 0.19618142351772277, |
|
"grad_norm": 1.5216374397277832, |
|
"learning_rate": 4.452610552959183e-05, |
|
"loss": 1.7155, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.19696614921179367, |
|
"grad_norm": 1.683119535446167, |
|
"learning_rate": 4.447355047201428e-05, |
|
"loss": 1.7346, |
|
"step": 7781 |
|
}, |
|
{ |
|
"epoch": 0.19775087490586457, |
|
"grad_norm": 1.6055350303649902, |
|
"learning_rate": 4.4420775658066414e-05, |
|
"loss": 1.7112, |
|
"step": 7812 |
|
}, |
|
{ |
|
"epoch": 0.19853560059993544, |
|
"grad_norm": 1.514739751815796, |
|
"learning_rate": 4.436778168330484e-05, |
|
"loss": 1.7274, |
|
"step": 7843 |
|
}, |
|
{ |
|
"epoch": 0.19932032629400634, |
|
"grad_norm": 2.131218433380127, |
|
"learning_rate": 4.4314569145759353e-05, |
|
"loss": 1.7127, |
|
"step": 7874 |
|
}, |
|
{ |
|
"epoch": 0.20010505198807724, |
|
"grad_norm": 1.4867665767669678, |
|
"learning_rate": 4.42611386459262e-05, |
|
"loss": 1.7245, |
|
"step": 7905 |
|
}, |
|
{ |
|
"epoch": 0.20088977768214814, |
|
"grad_norm": 1.6395418643951416, |
|
"learning_rate": 4.420749078676133e-05, |
|
"loss": 1.7146, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.201674503376219, |
|
"grad_norm": 1.629939079284668, |
|
"learning_rate": 4.4153626173673516e-05, |
|
"loss": 1.7153, |
|
"step": 7967 |
|
}, |
|
{ |
|
"epoch": 0.2024592290702899, |
|
"grad_norm": 1.5973584651947021, |
|
"learning_rate": 4.409954541451762e-05, |
|
"loss": 1.7102, |
|
"step": 7998 |
|
}, |
|
{ |
|
"epoch": 0.2032439547643608, |
|
"grad_norm": 1.4822708368301392, |
|
"learning_rate": 4.404524911958764e-05, |
|
"loss": 1.7046, |
|
"step": 8029 |
|
}, |
|
{ |
|
"epoch": 0.2040286804584317, |
|
"grad_norm": 1.4706634283065796, |
|
"learning_rate": 4.399073790160989e-05, |
|
"loss": 1.7022, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.20481340615250257, |
|
"grad_norm": 1.5917459726333618, |
|
"learning_rate": 4.393601237573607e-05, |
|
"loss": 1.6983, |
|
"step": 8091 |
|
}, |
|
{ |
|
"epoch": 0.20559813184657347, |
|
"grad_norm": 1.7328417301177979, |
|
"learning_rate": 4.388107315953628e-05, |
|
"loss": 1.7164, |
|
"step": 8122 |
|
}, |
|
{ |
|
"epoch": 0.20638285754064437, |
|
"grad_norm": 1.6152797937393188, |
|
"learning_rate": 4.382592087299212e-05, |
|
"loss": 1.7302, |
|
"step": 8153 |
|
}, |
|
{ |
|
"epoch": 0.20716758323471526, |
|
"grad_norm": 1.7153429985046387, |
|
"learning_rate": 4.377055613848964e-05, |
|
"loss": 1.7278, |
|
"step": 8184 |
|
}, |
|
{ |
|
"epoch": 0.20795230892878613, |
|
"grad_norm": 1.7167855501174927, |
|
"learning_rate": 4.3714979580812355e-05, |
|
"loss": 1.7021, |
|
"step": 8215 |
|
}, |
|
{ |
|
"epoch": 0.20873703462285703, |
|
"grad_norm": 1.458811640739441, |
|
"learning_rate": 4.365919182713416e-05, |
|
"loss": 1.7099, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 0.20952176031692793, |
|
"grad_norm": 5.516291618347168, |
|
"learning_rate": 4.360319350701226e-05, |
|
"loss": 1.7069, |
|
"step": 8277 |
|
}, |
|
{ |
|
"epoch": 0.21030648601099883, |
|
"grad_norm": 1.5669766664505005, |
|
"learning_rate": 4.3546985252380115e-05, |
|
"loss": 1.6983, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 0.2110912117050697, |
|
"grad_norm": 1.4598067998886108, |
|
"learning_rate": 4.349056769754021e-05, |
|
"loss": 1.7265, |
|
"step": 8339 |
|
}, |
|
{ |
|
"epoch": 0.2118759373991406, |
|
"grad_norm": 1.5436547994613647, |
|
"learning_rate": 4.3433941479156994e-05, |
|
"loss": 1.7128, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.2126606630932115, |
|
"grad_norm": 1.6275660991668701, |
|
"learning_rate": 4.3377107236249647e-05, |
|
"loss": 1.7229, |
|
"step": 8401 |
|
}, |
|
{ |
|
"epoch": 0.2134453887872824, |
|
"grad_norm": 1.6207513809204102, |
|
"learning_rate": 4.332006561018488e-05, |
|
"loss": 1.702, |
|
"step": 8432 |
|
}, |
|
{ |
|
"epoch": 0.21423011448135326, |
|
"grad_norm": 1.6795597076416016, |
|
"learning_rate": 4.3262817244669683e-05, |
|
"loss": 1.6808, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 0.21501484017542416, |
|
"grad_norm": 1.660192608833313, |
|
"learning_rate": 4.3205362785744083e-05, |
|
"loss": 1.7071, |
|
"step": 8494 |
|
}, |
|
{ |
|
"epoch": 0.21579956586949506, |
|
"grad_norm": 1.6086353063583374, |
|
"learning_rate": 4.314770288177384e-05, |
|
"loss": 1.7083, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.21658429156356596, |
|
"grad_norm": 1.475216269493103, |
|
"learning_rate": 4.308983818344313e-05, |
|
"loss": 1.7234, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 0.21736901725763683, |
|
"grad_norm": 1.7111340761184692, |
|
"learning_rate": 4.3031769343747206e-05, |
|
"loss": 1.6872, |
|
"step": 8587 |
|
}, |
|
{ |
|
"epoch": 0.21815374295170772, |
|
"grad_norm": 1.4544799327850342, |
|
"learning_rate": 4.297349701798505e-05, |
|
"loss": 1.692, |
|
"step": 8618 |
|
}, |
|
{ |
|
"epoch": 0.21893846864577862, |
|
"grad_norm": 1.6593588590621948, |
|
"learning_rate": 4.2915021863751916e-05, |
|
"loss": 1.6886, |
|
"step": 8649 |
|
}, |
|
{ |
|
"epoch": 0.21972319433984952, |
|
"grad_norm": 1.641408085823059, |
|
"learning_rate": 4.285634454093198e-05, |
|
"loss": 1.6872, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.2205079200339204, |
|
"grad_norm": 1.6036972999572754, |
|
"learning_rate": 4.279746571169086e-05, |
|
"loss": 1.7055, |
|
"step": 8711 |
|
}, |
|
{ |
|
"epoch": 0.2212926457279913, |
|
"grad_norm": 1.4984327554702759, |
|
"learning_rate": 4.2738386040468136e-05, |
|
"loss": 1.6997, |
|
"step": 8742 |
|
}, |
|
{ |
|
"epoch": 0.2220773714220622, |
|
"grad_norm": 1.471111536026001, |
|
"learning_rate": 4.2679106193969866e-05, |
|
"loss": 1.6926, |
|
"step": 8773 |
|
}, |
|
{ |
|
"epoch": 0.22286209711613308, |
|
"grad_norm": 1.521364688873291, |
|
"learning_rate": 4.261962684116106e-05, |
|
"loss": 1.6851, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 0.22364682281020395, |
|
"grad_norm": 1.6068321466445923, |
|
"learning_rate": 4.2559948653258145e-05, |
|
"loss": 1.7113, |
|
"step": 8835 |
|
}, |
|
{ |
|
"epoch": 0.22443154850427485, |
|
"grad_norm": 1.453379511833191, |
|
"learning_rate": 4.250007230372134e-05, |
|
"loss": 1.7025, |
|
"step": 8866 |
|
}, |
|
{ |
|
"epoch": 0.22521627419834575, |
|
"grad_norm": 1.5845959186553955, |
|
"learning_rate": 4.2439998468247126e-05, |
|
"loss": 1.6978, |
|
"step": 8897 |
|
}, |
|
{ |
|
"epoch": 0.22600099989241665, |
|
"grad_norm": 1.5308622121810913, |
|
"learning_rate": 4.2379727824760566e-05, |
|
"loss": 1.6956, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 0.22678572558648752, |
|
"grad_norm": 1.6339962482452393, |
|
"learning_rate": 4.231926105340768e-05, |
|
"loss": 1.6831, |
|
"step": 8959 |
|
}, |
|
{ |
|
"epoch": 0.22757045128055842, |
|
"grad_norm": 1.4533487558364868, |
|
"learning_rate": 4.225859883654776e-05, |
|
"loss": 1.7025, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.22835517697462931, |
|
"grad_norm": 3.971897840499878, |
|
"learning_rate": 4.219774185874569e-05, |
|
"loss": 1.689, |
|
"step": 9021 |
|
}, |
|
{ |
|
"epoch": 0.2291399026687002, |
|
"grad_norm": 1.4394114017486572, |
|
"learning_rate": 4.213669080676418e-05, |
|
"loss": 1.6841, |
|
"step": 9052 |
|
}, |
|
{ |
|
"epoch": 0.22992462836277108, |
|
"grad_norm": 1.821142315864563, |
|
"learning_rate": 4.2075446369556056e-05, |
|
"loss": 1.6883, |
|
"step": 9083 |
|
}, |
|
{ |
|
"epoch": 0.23070935405684198, |
|
"grad_norm": 1.6653649806976318, |
|
"learning_rate": 4.201400923825648e-05, |
|
"loss": 1.7011, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 0.23149407975091288, |
|
"grad_norm": 1.5895901918411255, |
|
"learning_rate": 4.195238010617511e-05, |
|
"loss": 1.7004, |
|
"step": 9145 |
|
}, |
|
{ |
|
"epoch": 0.23227880544498378, |
|
"grad_norm": 1.4648844003677368, |
|
"learning_rate": 4.1890559668788344e-05, |
|
"loss": 1.6872, |
|
"step": 9176 |
|
}, |
|
{ |
|
"epoch": 0.23306353113905465, |
|
"grad_norm": 1.5886753797531128, |
|
"learning_rate": 4.1828548623731405e-05, |
|
"loss": 1.6851, |
|
"step": 9207 |
|
}, |
|
{ |
|
"epoch": 0.23384825683312555, |
|
"grad_norm": 1.4713412523269653, |
|
"learning_rate": 4.1766347670790506e-05, |
|
"loss": 1.6818, |
|
"step": 9238 |
|
}, |
|
{ |
|
"epoch": 0.23463298252719644, |
|
"grad_norm": 1.5660710334777832, |
|
"learning_rate": 4.170395751189495e-05, |
|
"loss": 1.6844, |
|
"step": 9269 |
|
}, |
|
{ |
|
"epoch": 0.23541770822126734, |
|
"grad_norm": 1.7024312019348145, |
|
"learning_rate": 4.164137885110921e-05, |
|
"loss": 1.6839, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.2362024339153382, |
|
"grad_norm": 1.5936214923858643, |
|
"learning_rate": 4.157861239462495e-05, |
|
"loss": 1.6953, |
|
"step": 9331 |
|
}, |
|
{ |
|
"epoch": 0.2369871596094091, |
|
"grad_norm": 1.4709779024124146, |
|
"learning_rate": 4.1515658850753114e-05, |
|
"loss": 1.6806, |
|
"step": 9362 |
|
}, |
|
{ |
|
"epoch": 0.23777188530348, |
|
"grad_norm": 1.4303510189056396, |
|
"learning_rate": 4.145251892991588e-05, |
|
"loss": 1.6792, |
|
"step": 9393 |
|
}, |
|
{ |
|
"epoch": 0.2385566109975509, |
|
"grad_norm": 1.5452120304107666, |
|
"learning_rate": 4.138919334463868e-05, |
|
"loss": 1.6712, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 0.23934133669162178, |
|
"grad_norm": 1.4944697618484497, |
|
"learning_rate": 4.1325682809542124e-05, |
|
"loss": 1.6777, |
|
"step": 9455 |
|
}, |
|
{ |
|
"epoch": 0.24012606238569267, |
|
"grad_norm": 1.6359312534332275, |
|
"learning_rate": 4.126198804133398e-05, |
|
"loss": 1.6782, |
|
"step": 9486 |
|
}, |
|
{ |
|
"epoch": 0.24091078807976357, |
|
"grad_norm": 1.3874454498291016, |
|
"learning_rate": 4.1198109758801055e-05, |
|
"loss": 1.6805, |
|
"step": 9517 |
|
}, |
|
{ |
|
"epoch": 0.24169551377383447, |
|
"grad_norm": 1.4747340679168701, |
|
"learning_rate": 4.113404868280107e-05, |
|
"loss": 1.6704, |
|
"step": 9548 |
|
}, |
|
{ |
|
"epoch": 0.24248023946790534, |
|
"grad_norm": 1.95576012134552, |
|
"learning_rate": 4.106980553625457e-05, |
|
"loss": 1.7008, |
|
"step": 9579 |
|
}, |
|
{ |
|
"epoch": 0.24326496516197624, |
|
"grad_norm": 1.454005479812622, |
|
"learning_rate": 4.100538104413674e-05, |
|
"loss": 1.6771, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.24404969085604714, |
|
"grad_norm": 1.5640463829040527, |
|
"learning_rate": 4.09407759334692e-05, |
|
"loss": 1.6763, |
|
"step": 9641 |
|
}, |
|
{ |
|
"epoch": 0.24483441655011803, |
|
"grad_norm": 1.5076780319213867, |
|
"learning_rate": 4.087599093331186e-05, |
|
"loss": 1.6977, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 0.2456191422441889, |
|
"grad_norm": 1.5072520971298218, |
|
"learning_rate": 4.081102677475462e-05, |
|
"loss": 1.6749, |
|
"step": 9703 |
|
}, |
|
{ |
|
"epoch": 0.2464038679382598, |
|
"grad_norm": 1.6311815977096558, |
|
"learning_rate": 4.0745884190909194e-05, |
|
"loss": 1.684, |
|
"step": 9734 |
|
}, |
|
{ |
|
"epoch": 0.2471885936323307, |
|
"grad_norm": 1.5691202878952026, |
|
"learning_rate": 4.0680563916900796e-05, |
|
"loss": 1.6804, |
|
"step": 9765 |
|
}, |
|
{ |
|
"epoch": 0.2479733193264016, |
|
"grad_norm": 1.4325530529022217, |
|
"learning_rate": 4.0615066689859815e-05, |
|
"loss": 1.719, |
|
"step": 9796 |
|
}, |
|
{ |
|
"epoch": 0.24875804502047247, |
|
"grad_norm": 1.439177393913269, |
|
"learning_rate": 4.0549393248913584e-05, |
|
"loss": 1.6873, |
|
"step": 9827 |
|
}, |
|
{ |
|
"epoch": 0.24954277071454337, |
|
"grad_norm": 1.4155471324920654, |
|
"learning_rate": 4.048354433517794e-05, |
|
"loss": 1.692, |
|
"step": 9858 |
|
}, |
|
{ |
|
"epoch": 0.25032749640861424, |
|
"grad_norm": 1.5917115211486816, |
|
"learning_rate": 4.0417520691748916e-05, |
|
"loss": 1.6752, |
|
"step": 9889 |
|
}, |
|
{ |
|
"epoch": 0.25111222210268513, |
|
"grad_norm": 1.649154543876648, |
|
"learning_rate": 4.035132306369438e-05, |
|
"loss": 1.6603, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.25189694779675603, |
|
"grad_norm": 1.5114792585372925, |
|
"learning_rate": 4.028495219804555e-05, |
|
"loss": 1.7005, |
|
"step": 9951 |
|
}, |
|
{ |
|
"epoch": 0.25268167349082693, |
|
"grad_norm": 16.910812377929688, |
|
"learning_rate": 4.021840884378864e-05, |
|
"loss": 1.6846, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 0.25346639918489783, |
|
"grad_norm": 1.4342628717422485, |
|
"learning_rate": 4.015169375185633e-05, |
|
"loss": 1.6678, |
|
"step": 10013 |
|
}, |
|
{ |
|
"epoch": 0.2542511248789687, |
|
"grad_norm": 1.4815376996994019, |
|
"learning_rate": 4.0084807675119396e-05, |
|
"loss": 1.671, |
|
"step": 10044 |
|
}, |
|
{ |
|
"epoch": 0.2550358505730396, |
|
"grad_norm": 1.4633368253707886, |
|
"learning_rate": 4.0017751368378106e-05, |
|
"loss": 1.6824, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 0.2558205762671105, |
|
"grad_norm": 1.3904149532318115, |
|
"learning_rate": 3.995052558835377e-05, |
|
"loss": 1.6775, |
|
"step": 10106 |
|
}, |
|
{ |
|
"epoch": 0.25660530196118136, |
|
"grad_norm": 1.5234646797180176, |
|
"learning_rate": 3.988313109368017e-05, |
|
"loss": 1.6854, |
|
"step": 10137 |
|
}, |
|
{ |
|
"epoch": 0.25739002765525226, |
|
"grad_norm": 1.4530494213104248, |
|
"learning_rate": 3.981556864489504e-05, |
|
"loss": 1.6727, |
|
"step": 10168 |
|
}, |
|
{ |
|
"epoch": 0.25817475334932316, |
|
"grad_norm": 1.5600273609161377, |
|
"learning_rate": 3.974783900443142e-05, |
|
"loss": 1.6645, |
|
"step": 10199 |
|
}, |
|
{ |
|
"epoch": 0.25895947904339406, |
|
"grad_norm": 1.4213160276412964, |
|
"learning_rate": 3.9679942936609095e-05, |
|
"loss": 1.6898, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.25974420473746496, |
|
"grad_norm": 1.5741041898727417, |
|
"learning_rate": 3.961188120762596e-05, |
|
"loss": 1.693, |
|
"step": 10261 |
|
}, |
|
{ |
|
"epoch": 0.26052893043153585, |
|
"grad_norm": 1.564493179321289, |
|
"learning_rate": 3.954365458554938e-05, |
|
"loss": 1.6836, |
|
"step": 10292 |
|
}, |
|
{ |
|
"epoch": 0.26131365612560675, |
|
"grad_norm": 1.5584787130355835, |
|
"learning_rate": 3.947526384030751e-05, |
|
"loss": 1.6852, |
|
"step": 10323 |
|
}, |
|
{ |
|
"epoch": 0.26209838181967765, |
|
"grad_norm": 1.4936350584030151, |
|
"learning_rate": 3.9406709743680624e-05, |
|
"loss": 1.6777, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 0.26288310751374855, |
|
"grad_norm": 1.504725694656372, |
|
"learning_rate": 3.9337993069292366e-05, |
|
"loss": 1.6765, |
|
"step": 10385 |
|
}, |
|
{ |
|
"epoch": 0.2636678332078194, |
|
"grad_norm": 1.4809914827346802, |
|
"learning_rate": 3.926911459260109e-05, |
|
"loss": 1.6578, |
|
"step": 10416 |
|
}, |
|
{ |
|
"epoch": 0.2644525589018903, |
|
"grad_norm": 1.529976725578308, |
|
"learning_rate": 3.920007509089102e-05, |
|
"loss": 1.6709, |
|
"step": 10447 |
|
}, |
|
{ |
|
"epoch": 0.2652372845959612, |
|
"grad_norm": 1.483694076538086, |
|
"learning_rate": 3.913087534326357e-05, |
|
"loss": 1.6713, |
|
"step": 10478 |
|
}, |
|
{ |
|
"epoch": 0.2660220102900321, |
|
"grad_norm": 1.4282972812652588, |
|
"learning_rate": 3.9061516130628475e-05, |
|
"loss": 1.6784, |
|
"step": 10509 |
|
}, |
|
{ |
|
"epoch": 0.266806735984103, |
|
"grad_norm": 1.5122032165527344, |
|
"learning_rate": 3.8991998235695025e-05, |
|
"loss": 1.6603, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.2675914616781739, |
|
"grad_norm": 1.5154742002487183, |
|
"learning_rate": 3.8922322442963224e-05, |
|
"loss": 1.6831, |
|
"step": 10571 |
|
}, |
|
{ |
|
"epoch": 0.2683761873722448, |
|
"grad_norm": 1.4630860090255737, |
|
"learning_rate": 3.885248953871491e-05, |
|
"loss": 1.6715, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 0.2691609130663157, |
|
"grad_norm": 1.4164702892303467, |
|
"learning_rate": 3.8782500311004915e-05, |
|
"loss": 1.6654, |
|
"step": 10633 |
|
}, |
|
{ |
|
"epoch": 0.2699456387603865, |
|
"grad_norm": 1.5865578651428223, |
|
"learning_rate": 3.871235554965218e-05, |
|
"loss": 1.6829, |
|
"step": 10664 |
|
}, |
|
{ |
|
"epoch": 0.2707303644544574, |
|
"grad_norm": 1.4984766244888306, |
|
"learning_rate": 3.864205604623078e-05, |
|
"loss": 1.673, |
|
"step": 10695 |
|
}, |
|
{ |
|
"epoch": 0.2715150901485283, |
|
"grad_norm": 1.5477566719055176, |
|
"learning_rate": 3.857160259406107e-05, |
|
"loss": 1.6711, |
|
"step": 10726 |
|
}, |
|
{ |
|
"epoch": 0.2722998158425992, |
|
"grad_norm": 1.5356842279434204, |
|
"learning_rate": 3.8500995988200674e-05, |
|
"loss": 1.6556, |
|
"step": 10757 |
|
}, |
|
{ |
|
"epoch": 0.2730845415366701, |
|
"grad_norm": 1.413104772567749, |
|
"learning_rate": 3.843023702543556e-05, |
|
"loss": 1.658, |
|
"step": 10788 |
|
}, |
|
{ |
|
"epoch": 0.273869267230741, |
|
"grad_norm": 1.5174081325531006, |
|
"learning_rate": 3.8359326504270984e-05, |
|
"loss": 1.6672, |
|
"step": 10819 |
|
}, |
|
{ |
|
"epoch": 0.2746539929248119, |
|
"grad_norm": 1.4649910926818848, |
|
"learning_rate": 3.828826522492255e-05, |
|
"loss": 1.6625, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.2754387186188828, |
|
"grad_norm": 1.5240408182144165, |
|
"learning_rate": 3.821705398930713e-05, |
|
"loss": 1.6619, |
|
"step": 10881 |
|
}, |
|
{ |
|
"epoch": 0.27622344431295365, |
|
"grad_norm": 1.4349104166030884, |
|
"learning_rate": 3.814569360103385e-05, |
|
"loss": 1.6595, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 0.27700817000702455, |
|
"grad_norm": 1.4311225414276123, |
|
"learning_rate": 3.807418486539499e-05, |
|
"loss": 1.6557, |
|
"step": 10943 |
|
}, |
|
{ |
|
"epoch": 0.27779289570109544, |
|
"grad_norm": 1.5817755460739136, |
|
"learning_rate": 3.80025285893569e-05, |
|
"loss": 1.6882, |
|
"step": 10974 |
|
}, |
|
{ |
|
"epoch": 0.27857762139516634, |
|
"grad_norm": 1.5182181596755981, |
|
"learning_rate": 3.793072558155093e-05, |
|
"loss": 1.6697, |
|
"step": 11005 |
|
}, |
|
{ |
|
"epoch": 0.27936234708923724, |
|
"grad_norm": 1.4836517572402954, |
|
"learning_rate": 3.785877665226426e-05, |
|
"loss": 1.6576, |
|
"step": 11036 |
|
}, |
|
{ |
|
"epoch": 0.28014707278330814, |
|
"grad_norm": 1.460788607597351, |
|
"learning_rate": 3.778668261343079e-05, |
|
"loss": 1.6607, |
|
"step": 11067 |
|
}, |
|
{ |
|
"epoch": 0.28093179847737904, |
|
"grad_norm": 1.4307125806808472, |
|
"learning_rate": 3.771444427862192e-05, |
|
"loss": 1.662, |
|
"step": 11098 |
|
}, |
|
{ |
|
"epoch": 0.28171652417144993, |
|
"grad_norm": 1.4999738931655884, |
|
"learning_rate": 3.7642062463037465e-05, |
|
"loss": 1.6406, |
|
"step": 11129 |
|
}, |
|
{ |
|
"epoch": 0.2825012498655208, |
|
"grad_norm": 1.4646129608154297, |
|
"learning_rate": 3.7569537983496373e-05, |
|
"loss": 1.6653, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.2832859755595917, |
|
"grad_norm": 1.4709292650222778, |
|
"learning_rate": 3.749687165842753e-05, |
|
"loss": 1.6704, |
|
"step": 11191 |
|
}, |
|
{ |
|
"epoch": 0.28407070125366257, |
|
"grad_norm": 1.494458556175232, |
|
"learning_rate": 3.7424064307860536e-05, |
|
"loss": 1.6534, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 0.28485542694773347, |
|
"grad_norm": 1.4409736394882202, |
|
"learning_rate": 3.735111675341645e-05, |
|
"loss": 1.6645, |
|
"step": 11253 |
|
}, |
|
{ |
|
"epoch": 0.28564015264180437, |
|
"grad_norm": 1.4628338813781738, |
|
"learning_rate": 3.7278029818298524e-05, |
|
"loss": 1.6611, |
|
"step": 11284 |
|
}, |
|
{ |
|
"epoch": 0.28642487833587527, |
|
"grad_norm": 1.3659113645553589, |
|
"learning_rate": 3.720480432728287e-05, |
|
"loss": 1.6435, |
|
"step": 11315 |
|
}, |
|
{ |
|
"epoch": 0.28720960402994616, |
|
"grad_norm": 1.3704752922058105, |
|
"learning_rate": 3.71314411067092e-05, |
|
"loss": 1.6507, |
|
"step": 11346 |
|
}, |
|
{ |
|
"epoch": 0.28799432972401706, |
|
"grad_norm": 1.579837441444397, |
|
"learning_rate": 3.70579409844715e-05, |
|
"loss": 1.6716, |
|
"step": 11377 |
|
}, |
|
{ |
|
"epoch": 0.2887790554180879, |
|
"grad_norm": 1.5566996335983276, |
|
"learning_rate": 3.698430479000865e-05, |
|
"loss": 1.6439, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 0.2895637811121588, |
|
"grad_norm": 1.4722687005996704, |
|
"learning_rate": 3.691053335429509e-05, |
|
"loss": 1.683, |
|
"step": 11439 |
|
}, |
|
{ |
|
"epoch": 0.2903485068062297, |
|
"grad_norm": 1.491283893585205, |
|
"learning_rate": 3.683662750983147e-05, |
|
"loss": 1.6606, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.2911332325003006, |
|
"grad_norm": 1.402040719985962, |
|
"learning_rate": 3.676258809063518e-05, |
|
"loss": 1.6582, |
|
"step": 11501 |
|
}, |
|
{ |
|
"epoch": 0.2919179581943715, |
|
"grad_norm": 1.4377038478851318, |
|
"learning_rate": 3.6688415932231004e-05, |
|
"loss": 1.6398, |
|
"step": 11532 |
|
}, |
|
{ |
|
"epoch": 0.2927026838884424, |
|
"grad_norm": 1.4151259660720825, |
|
"learning_rate": 3.661411187164166e-05, |
|
"loss": 1.6645, |
|
"step": 11563 |
|
}, |
|
{ |
|
"epoch": 0.2934874095825133, |
|
"grad_norm": 1.5219615697860718, |
|
"learning_rate": 3.65396767473784e-05, |
|
"loss": 1.6705, |
|
"step": 11594 |
|
}, |
|
{ |
|
"epoch": 0.2942721352765842, |
|
"grad_norm": 1.533252239227295, |
|
"learning_rate": 3.6465111399431465e-05, |
|
"loss": 1.6714, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 0.29505686097065503, |
|
"grad_norm": 1.410959243774414, |
|
"learning_rate": 3.6390416669260674e-05, |
|
"loss": 1.6533, |
|
"step": 11656 |
|
}, |
|
{ |
|
"epoch": 0.29584158666472593, |
|
"grad_norm": 1.5377541780471802, |
|
"learning_rate": 3.63155933997859e-05, |
|
"loss": 1.6505, |
|
"step": 11687 |
|
}, |
|
{ |
|
"epoch": 0.29662631235879683, |
|
"grad_norm": 1.4504135847091675, |
|
"learning_rate": 3.624064243537758e-05, |
|
"loss": 1.6287, |
|
"step": 11718 |
|
}, |
|
{ |
|
"epoch": 0.2974110380528677, |
|
"grad_norm": 1.4606986045837402, |
|
"learning_rate": 3.616556462184716e-05, |
|
"loss": 1.6592, |
|
"step": 11749 |
|
}, |
|
{ |
|
"epoch": 0.2981957637469386, |
|
"grad_norm": 1.4440289735794067, |
|
"learning_rate": 3.609036080643755e-05, |
|
"loss": 1.6598, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.2989804894410095, |
|
"grad_norm": 1.5399249792099, |
|
"learning_rate": 3.60150318378136e-05, |
|
"loss": 1.6852, |
|
"step": 11811 |
|
}, |
|
{ |
|
"epoch": 0.2997652151350804, |
|
"grad_norm": 1.4778543710708618, |
|
"learning_rate": 3.5939578566052465e-05, |
|
"loss": 1.6462, |
|
"step": 11842 |
|
}, |
|
{ |
|
"epoch": 0.3005499408291513, |
|
"grad_norm": 1.4979726076126099, |
|
"learning_rate": 3.586400184263408e-05, |
|
"loss": 1.6576, |
|
"step": 11873 |
|
}, |
|
{ |
|
"epoch": 0.30133466652322216, |
|
"grad_norm": 1.4904232025146484, |
|
"learning_rate": 3.578830252043148e-05, |
|
"loss": 1.6476, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 0.30211939221729306, |
|
"grad_norm": 1.5472886562347412, |
|
"learning_rate": 3.571248145370125e-05, |
|
"loss": 1.6721, |
|
"step": 11935 |
|
}, |
|
{ |
|
"epoch": 0.30290411791136396, |
|
"grad_norm": 1.4954209327697754, |
|
"learning_rate": 3.5636539498073794e-05, |
|
"loss": 1.6483, |
|
"step": 11966 |
|
}, |
|
{ |
|
"epoch": 0.30368884360543486, |
|
"grad_norm": 1.4504363536834717, |
|
"learning_rate": 3.556047751054378e-05, |
|
"loss": 1.657, |
|
"step": 11997 |
|
}, |
|
{ |
|
"epoch": 0.30447356929950575, |
|
"grad_norm": 1.3581033945083618, |
|
"learning_rate": 3.548429634946039e-05, |
|
"loss": 1.6579, |
|
"step": 12028 |
|
}, |
|
{ |
|
"epoch": 0.30525829499357665, |
|
"grad_norm": 1.4421014785766602, |
|
"learning_rate": 3.540799687451768e-05, |
|
"loss": 1.6496, |
|
"step": 12059 |
|
}, |
|
{ |
|
"epoch": 0.30604302068764755, |
|
"grad_norm": 1.523169994354248, |
|
"learning_rate": 3.533157994674485e-05, |
|
"loss": 1.6714, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.30682774638171845, |
|
"grad_norm": 1.455269455909729, |
|
"learning_rate": 3.5255046428496546e-05, |
|
"loss": 1.6695, |
|
"step": 12121 |
|
}, |
|
{ |
|
"epoch": 0.3076124720757893, |
|
"grad_norm": 1.4330891370773315, |
|
"learning_rate": 3.517839718344311e-05, |
|
"loss": 1.6519, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 0.3083971977698602, |
|
"grad_norm": 1.3913158178329468, |
|
"learning_rate": 3.510163307656086e-05, |
|
"loss": 1.6329, |
|
"step": 12183 |
|
}, |
|
{ |
|
"epoch": 0.3091819234639311, |
|
"grad_norm": 1.355193018913269, |
|
"learning_rate": 3.5024754974122324e-05, |
|
"loss": 1.624, |
|
"step": 12214 |
|
}, |
|
{ |
|
"epoch": 0.309966649158002, |
|
"grad_norm": 1.4055231809616089, |
|
"learning_rate": 3.494776374368643e-05, |
|
"loss": 1.6491, |
|
"step": 12245 |
|
}, |
|
{ |
|
"epoch": 0.3107513748520729, |
|
"grad_norm": 1.4227032661437988, |
|
"learning_rate": 3.4870660254088724e-05, |
|
"loss": 1.6274, |
|
"step": 12276 |
|
}, |
|
{ |
|
"epoch": 0.3115361005461438, |
|
"grad_norm": 1.4558427333831787, |
|
"learning_rate": 3.479344537543164e-05, |
|
"loss": 1.6419, |
|
"step": 12307 |
|
}, |
|
{ |
|
"epoch": 0.3123208262402147, |
|
"grad_norm": 1.5154629945755005, |
|
"learning_rate": 3.4716119979074565e-05, |
|
"loss": 1.6443, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 0.3131055519342856, |
|
"grad_norm": 1.4458774328231812, |
|
"learning_rate": 3.463868493762412e-05, |
|
"loss": 1.6615, |
|
"step": 12369 |
|
}, |
|
{ |
|
"epoch": 0.3138902776283564, |
|
"grad_norm": 1.4116544723510742, |
|
"learning_rate": 3.456114112492418e-05, |
|
"loss": 1.6481, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3146750033224273, |
|
"grad_norm": 1.8497071266174316, |
|
"learning_rate": 3.4483489416046164e-05, |
|
"loss": 1.6262, |
|
"step": 12431 |
|
}, |
|
{ |
|
"epoch": 0.3154597290164982, |
|
"grad_norm": 1.3854331970214844, |
|
"learning_rate": 3.440573068727905e-05, |
|
"loss": 1.6387, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 0.3162444547105691, |
|
"grad_norm": 1.509178876876831, |
|
"learning_rate": 3.4327865816119495e-05, |
|
"loss": 1.6566, |
|
"step": 12493 |
|
}, |
|
{ |
|
"epoch": 0.31702918040464, |
|
"grad_norm": 1.3977612257003784, |
|
"learning_rate": 3.4249895681262025e-05, |
|
"loss": 1.6676, |
|
"step": 12524 |
|
}, |
|
{ |
|
"epoch": 0.3178139060987109, |
|
"grad_norm": 1.3736423254013062, |
|
"learning_rate": 3.417182116258899e-05, |
|
"loss": 1.6238, |
|
"step": 12555 |
|
}, |
|
{ |
|
"epoch": 0.3185986317927818, |
|
"grad_norm": 1.4226630926132202, |
|
"learning_rate": 3.409364314116074e-05, |
|
"loss": 1.6513, |
|
"step": 12586 |
|
}, |
|
{ |
|
"epoch": 0.3193833574868527, |
|
"grad_norm": 1.4804571866989136, |
|
"learning_rate": 3.401536249920559e-05, |
|
"loss": 1.6383, |
|
"step": 12617 |
|
}, |
|
{ |
|
"epoch": 0.32016808318092355, |
|
"grad_norm": 1.456168532371521, |
|
"learning_rate": 3.393698012010998e-05, |
|
"loss": 1.6621, |
|
"step": 12648 |
|
}, |
|
{ |
|
"epoch": 0.32095280887499444, |
|
"grad_norm": 1.3990952968597412, |
|
"learning_rate": 3.385849688840839e-05, |
|
"loss": 1.6376, |
|
"step": 12679 |
|
}, |
|
{ |
|
"epoch": 0.32173753456906534, |
|
"grad_norm": 1.3588812351226807, |
|
"learning_rate": 3.3779913689773414e-05, |
|
"loss": 1.656, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.32252226026313624, |
|
"grad_norm": 1.4718931913375854, |
|
"learning_rate": 3.370123141100578e-05, |
|
"loss": 1.6255, |
|
"step": 12741 |
|
}, |
|
{ |
|
"epoch": 0.32330698595720714, |
|
"grad_norm": 1.3603503704071045, |
|
"learning_rate": 3.3622450940024305e-05, |
|
"loss": 1.6517, |
|
"step": 12772 |
|
}, |
|
{ |
|
"epoch": 0.32409171165127804, |
|
"grad_norm": 1.4493441581726074, |
|
"learning_rate": 3.35435731658559e-05, |
|
"loss": 1.643, |
|
"step": 12803 |
|
}, |
|
{ |
|
"epoch": 0.32487643734534893, |
|
"grad_norm": 1.3813337087631226, |
|
"learning_rate": 3.346459897862552e-05, |
|
"loss": 1.6449, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 0.32566116303941983, |
|
"grad_norm": 1.5027899742126465, |
|
"learning_rate": 3.338552926954613e-05, |
|
"loss": 1.6497, |
|
"step": 12865 |
|
}, |
|
{ |
|
"epoch": 0.3264458887334907, |
|
"grad_norm": 1.3805309534072876, |
|
"learning_rate": 3.330636493090868e-05, |
|
"loss": 1.6449, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 0.3272306144275616, |
|
"grad_norm": 1.642248511314392, |
|
"learning_rate": 3.322710685607193e-05, |
|
"loss": 1.6261, |
|
"step": 12927 |
|
}, |
|
{ |
|
"epoch": 0.32801534012163247, |
|
"grad_norm": 1.4579522609710693, |
|
"learning_rate": 3.314775593945251e-05, |
|
"loss": 1.6648, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 0.32880006581570337, |
|
"grad_norm": 1.3579092025756836, |
|
"learning_rate": 3.3068313076514714e-05, |
|
"loss": 1.6468, |
|
"step": 12989 |
|
}, |
|
{ |
|
"epoch": 0.32958479150977427, |
|
"grad_norm": 1.406051754951477, |
|
"learning_rate": 3.298877916376047e-05, |
|
"loss": 1.6249, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.33036951720384516, |
|
"grad_norm": 1.457335114479065, |
|
"learning_rate": 3.290915509871915e-05, |
|
"loss": 1.6353, |
|
"step": 13051 |
|
}, |
|
{ |
|
"epoch": 0.33115424289791606, |
|
"grad_norm": 1.4548041820526123, |
|
"learning_rate": 3.282944177993753e-05, |
|
"loss": 1.6272, |
|
"step": 13082 |
|
}, |
|
{ |
|
"epoch": 0.33193896859198696, |
|
"grad_norm": 1.4140032529830933, |
|
"learning_rate": 3.274964010696957e-05, |
|
"loss": 1.6479, |
|
"step": 13113 |
|
}, |
|
{ |
|
"epoch": 0.3327236942860578, |
|
"grad_norm": 1.3436623811721802, |
|
"learning_rate": 3.266975098036629e-05, |
|
"loss": 1.6452, |
|
"step": 13144 |
|
}, |
|
{ |
|
"epoch": 0.3335084199801287, |
|
"grad_norm": 1.4224274158477783, |
|
"learning_rate": 3.258977530166562e-05, |
|
"loss": 1.6242, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 0.3342931456741996, |
|
"grad_norm": 1.5661940574645996, |
|
"learning_rate": 3.250971397338227e-05, |
|
"loss": 1.6404, |
|
"step": 13206 |
|
}, |
|
{ |
|
"epoch": 0.3350778713682705, |
|
"grad_norm": 1.4696576595306396, |
|
"learning_rate": 3.2429567898997404e-05, |
|
"loss": 1.6436, |
|
"step": 13237 |
|
}, |
|
{ |
|
"epoch": 0.3358625970623414, |
|
"grad_norm": 1.4438591003417969, |
|
"learning_rate": 3.234933798294859e-05, |
|
"loss": 1.6404, |
|
"step": 13268 |
|
}, |
|
{ |
|
"epoch": 0.3366473227564123, |
|
"grad_norm": 1.4548406600952148, |
|
"learning_rate": 3.2269025130619535e-05, |
|
"loss": 1.6461, |
|
"step": 13299 |
|
}, |
|
{ |
|
"epoch": 0.3374320484504832, |
|
"grad_norm": 1.4180691242218018, |
|
"learning_rate": 3.218863024832985e-05, |
|
"loss": 1.6377, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.3382167741445541, |
|
"grad_norm": 1.4060105085372925, |
|
"learning_rate": 3.2108154243324864e-05, |
|
"loss": 1.6045, |
|
"step": 13361 |
|
}, |
|
{ |
|
"epoch": 0.33900149983862493, |
|
"grad_norm": 1.4134920835494995, |
|
"learning_rate": 3.2027598023765345e-05, |
|
"loss": 1.6264, |
|
"step": 13392 |
|
}, |
|
{ |
|
"epoch": 0.33978622553269583, |
|
"grad_norm": 1.4582122564315796, |
|
"learning_rate": 3.194696249871729e-05, |
|
"loss": 1.623, |
|
"step": 13423 |
|
}, |
|
{ |
|
"epoch": 0.3405709512267667, |
|
"grad_norm": 1.4027389287948608, |
|
"learning_rate": 3.186624857814164e-05, |
|
"loss": 1.6337, |
|
"step": 13454 |
|
}, |
|
{ |
|
"epoch": 0.3413556769208376, |
|
"grad_norm": 1.3397070169448853, |
|
"learning_rate": 3.178545717288401e-05, |
|
"loss": 1.6334, |
|
"step": 13485 |
|
}, |
|
{ |
|
"epoch": 0.3421404026149085, |
|
"grad_norm": 1.5358332395553589, |
|
"learning_rate": 3.170458919466444e-05, |
|
"loss": 1.6393, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 0.3429251283089794, |
|
"grad_norm": 1.5479260683059692, |
|
"learning_rate": 3.1623645556067063e-05, |
|
"loss": 1.6357, |
|
"step": 13547 |
|
}, |
|
{ |
|
"epoch": 0.3437098540030503, |
|
"grad_norm": 1.3949965238571167, |
|
"learning_rate": 3.154262717052985e-05, |
|
"loss": 1.6325, |
|
"step": 13578 |
|
}, |
|
{ |
|
"epoch": 0.3444945796971212, |
|
"grad_norm": 1.392903208732605, |
|
"learning_rate": 3.146153495233426e-05, |
|
"loss": 1.6071, |
|
"step": 13609 |
|
}, |
|
{ |
|
"epoch": 0.34527930539119206, |
|
"grad_norm": 1.4290788173675537, |
|
"learning_rate": 3.1380369816594944e-05, |
|
"loss": 1.6266, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.34606403108526296, |
|
"grad_norm": 1.4005228281021118, |
|
"learning_rate": 3.129913267924946e-05, |
|
"loss": 1.6391, |
|
"step": 13671 |
|
}, |
|
{ |
|
"epoch": 0.34684875677933386, |
|
"grad_norm": 1.378369927406311, |
|
"learning_rate": 3.121782445704782e-05, |
|
"loss": 1.6495, |
|
"step": 13702 |
|
}, |
|
{ |
|
"epoch": 0.34763348247340475, |
|
"grad_norm": 1.4202784299850464, |
|
"learning_rate": 3.11364460675423e-05, |
|
"loss": 1.637, |
|
"step": 13733 |
|
}, |
|
{ |
|
"epoch": 0.34841820816747565, |
|
"grad_norm": 1.3670291900634766, |
|
"learning_rate": 3.1054998429076934e-05, |
|
"loss": 1.5941, |
|
"step": 13764 |
|
}, |
|
{ |
|
"epoch": 0.34920293386154655, |
|
"grad_norm": 1.3714202642440796, |
|
"learning_rate": 3.097348246077728e-05, |
|
"loss": 1.6096, |
|
"step": 13795 |
|
}, |
|
{ |
|
"epoch": 0.34998765955561745, |
|
"grad_norm": 1.4889552593231201, |
|
"learning_rate": 3.0891899082539924e-05, |
|
"loss": 1.6245, |
|
"step": 13826 |
|
}, |
|
{ |
|
"epoch": 0.35077238524968835, |
|
"grad_norm": 1.4640086889266968, |
|
"learning_rate": 3.0810249215022233e-05, |
|
"loss": 1.6197, |
|
"step": 13857 |
|
}, |
|
{ |
|
"epoch": 0.35155711094375924, |
|
"grad_norm": 1.385380506515503, |
|
"learning_rate": 3.0728533779631865e-05, |
|
"loss": 1.61, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 0.3523418366378301, |
|
"grad_norm": 1.3958945274353027, |
|
"learning_rate": 3.064675369851637e-05, |
|
"loss": 1.6139, |
|
"step": 13919 |
|
}, |
|
{ |
|
"epoch": 0.353126562331901, |
|
"grad_norm": 1.3746731281280518, |
|
"learning_rate": 3.056490989455289e-05, |
|
"loss": 1.6307, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.3539112880259719, |
|
"grad_norm": 1.4196429252624512, |
|
"learning_rate": 3.0483003291337596e-05, |
|
"loss": 1.6192, |
|
"step": 13981 |
|
}, |
|
{ |
|
"epoch": 0.3546960137200428, |
|
"grad_norm": 1.3648637533187866, |
|
"learning_rate": 3.040103481317539e-05, |
|
"loss": 1.6124, |
|
"step": 14012 |
|
}, |
|
{ |
|
"epoch": 0.3554807394141137, |
|
"grad_norm": 1.422004222869873, |
|
"learning_rate": 3.03190053850694e-05, |
|
"loss": 1.6288, |
|
"step": 14043 |
|
}, |
|
{ |
|
"epoch": 0.3562654651081846, |
|
"grad_norm": 1.4687801599502563, |
|
"learning_rate": 3.0236915932710573e-05, |
|
"loss": 1.6118, |
|
"step": 14074 |
|
}, |
|
{ |
|
"epoch": 0.3570501908022555, |
|
"grad_norm": 1.30635404586792, |
|
"learning_rate": 3.0154767382467232e-05, |
|
"loss": 1.6341, |
|
"step": 14105 |
|
}, |
|
{ |
|
"epoch": 0.35783491649632637, |
|
"grad_norm": 1.4216945171356201, |
|
"learning_rate": 3.0072560661374582e-05, |
|
"loss": 1.6385, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 0.3586196421903972, |
|
"grad_norm": 1.4296518564224243, |
|
"learning_rate": 2.999029669712431e-05, |
|
"loss": 1.6262, |
|
"step": 14167 |
|
}, |
|
{ |
|
"epoch": 0.3594043678844681, |
|
"grad_norm": 1.4529691934585571, |
|
"learning_rate": 2.990797641805408e-05, |
|
"loss": 1.6136, |
|
"step": 14198 |
|
}, |
|
{ |
|
"epoch": 0.360189093578539, |
|
"grad_norm": 1.389478325843811, |
|
"learning_rate": 2.982560075313704e-05, |
|
"loss": 1.6263, |
|
"step": 14229 |
|
}, |
|
{ |
|
"epoch": 0.3609738192726099, |
|
"grad_norm": 1.3917667865753174, |
|
"learning_rate": 2.9743170631971368e-05, |
|
"loss": 1.6456, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.3617585449666808, |
|
"grad_norm": 1.3452563285827637, |
|
"learning_rate": 2.9660686984769792e-05, |
|
"loss": 1.6284, |
|
"step": 14291 |
|
}, |
|
{ |
|
"epoch": 0.3625432706607517, |
|
"grad_norm": 1.421159029006958, |
|
"learning_rate": 2.9578150742349047e-05, |
|
"loss": 1.6232, |
|
"step": 14322 |
|
}, |
|
{ |
|
"epoch": 0.3633279963548226, |
|
"grad_norm": 1.4312077760696411, |
|
"learning_rate": 2.949556283611942e-05, |
|
"loss": 1.6006, |
|
"step": 14353 |
|
}, |
|
{ |
|
"epoch": 0.3641127220488935, |
|
"grad_norm": 1.4271692037582397, |
|
"learning_rate": 2.9412924198074206e-05, |
|
"loss": 1.6177, |
|
"step": 14384 |
|
}, |
|
{ |
|
"epoch": 0.36489744774296434, |
|
"grad_norm": 1.3584555387496948, |
|
"learning_rate": 2.9330235760779208e-05, |
|
"loss": 1.6148, |
|
"step": 14415 |
|
}, |
|
{ |
|
"epoch": 0.36568217343703524, |
|
"grad_norm": 1.3882123231887817, |
|
"learning_rate": 2.9247498457362188e-05, |
|
"loss": 1.6327, |
|
"step": 14446 |
|
}, |
|
{ |
|
"epoch": 0.36646689913110614, |
|
"grad_norm": 1.540114402770996, |
|
"learning_rate": 2.9164713221502373e-05, |
|
"loss": 1.6052, |
|
"step": 14477 |
|
}, |
|
{ |
|
"epoch": 0.36725162482517704, |
|
"grad_norm": 1.3554641008377075, |
|
"learning_rate": 2.9081880987419912e-05, |
|
"loss": 1.6091, |
|
"step": 14508 |
|
}, |
|
{ |
|
"epoch": 0.36803635051924793, |
|
"grad_norm": 1.3693712949752808, |
|
"learning_rate": 2.8999002689865296e-05, |
|
"loss": 1.5936, |
|
"step": 14539 |
|
}, |
|
{ |
|
"epoch": 0.36882107621331883, |
|
"grad_norm": 1.354278564453125, |
|
"learning_rate": 2.8916079264108852e-05, |
|
"loss": 1.612, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 0.36960580190738973, |
|
"grad_norm": 1.3731021881103516, |
|
"learning_rate": 2.883311164593017e-05, |
|
"loss": 1.6064, |
|
"step": 14601 |
|
}, |
|
{ |
|
"epoch": 0.37039052760146063, |
|
"grad_norm": 1.3914356231689453, |
|
"learning_rate": 2.875010077160754e-05, |
|
"loss": 1.6036, |
|
"step": 14632 |
|
}, |
|
{ |
|
"epoch": 0.37117525329553147, |
|
"grad_norm": 1.4811164140701294, |
|
"learning_rate": 2.866704757790741e-05, |
|
"loss": 1.6195, |
|
"step": 14663 |
|
}, |
|
{ |
|
"epoch": 0.37195997898960237, |
|
"grad_norm": 1.4619332551956177, |
|
"learning_rate": 2.858395300207376e-05, |
|
"loss": 1.6315, |
|
"step": 14694 |
|
}, |
|
{ |
|
"epoch": 0.37274470468367327, |
|
"grad_norm": 1.456950306892395, |
|
"learning_rate": 2.8500817981817607e-05, |
|
"loss": 1.6276, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 0.37352943037774416, |
|
"grad_norm": 5.129410266876221, |
|
"learning_rate": 2.8417643455306336e-05, |
|
"loss": 1.6234, |
|
"step": 14756 |
|
}, |
|
{ |
|
"epoch": 0.37431415607181506, |
|
"grad_norm": 1.3831191062927246, |
|
"learning_rate": 2.8334430361153185e-05, |
|
"loss": 1.6163, |
|
"step": 14787 |
|
}, |
|
{ |
|
"epoch": 0.37509888176588596, |
|
"grad_norm": 1.3817623853683472, |
|
"learning_rate": 2.8251179638406612e-05, |
|
"loss": 1.6206, |
|
"step": 14818 |
|
}, |
|
{ |
|
"epoch": 0.37588360745995686, |
|
"grad_norm": 1.5285260677337646, |
|
"learning_rate": 2.8167892226539704e-05, |
|
"loss": 1.6117, |
|
"step": 14849 |
|
}, |
|
{ |
|
"epoch": 0.37666833315402776, |
|
"grad_norm": 1.403324007987976, |
|
"learning_rate": 2.8084569065439588e-05, |
|
"loss": 1.5962, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.3774530588480986, |
|
"grad_norm": 1.3314014673233032, |
|
"learning_rate": 2.8001211095396807e-05, |
|
"loss": 1.6116, |
|
"step": 14911 |
|
}, |
|
{ |
|
"epoch": 0.3782377845421695, |
|
"grad_norm": 1.4300462007522583, |
|
"learning_rate": 2.791781925709473e-05, |
|
"loss": 1.6234, |
|
"step": 14942 |
|
}, |
|
{ |
|
"epoch": 0.3790225102362404, |
|
"grad_norm": 1.424811601638794, |
|
"learning_rate": 2.7834394491598908e-05, |
|
"loss": 1.5986, |
|
"step": 14973 |
|
}, |
|
{ |
|
"epoch": 0.3798072359303113, |
|
"grad_norm": 1.3818182945251465, |
|
"learning_rate": 2.7750937740346485e-05, |
|
"loss": 1.6012, |
|
"step": 15004 |
|
}, |
|
{ |
|
"epoch": 0.3805919616243822, |
|
"grad_norm": 1.4053683280944824, |
|
"learning_rate": 2.7667449945135564e-05, |
|
"loss": 1.6018, |
|
"step": 15035 |
|
}, |
|
{ |
|
"epoch": 0.3813766873184531, |
|
"grad_norm": 1.5093421936035156, |
|
"learning_rate": 2.7583932048114557e-05, |
|
"loss": 1.61, |
|
"step": 15066 |
|
}, |
|
{ |
|
"epoch": 0.382161413012524, |
|
"grad_norm": 1.412494421005249, |
|
"learning_rate": 2.7500384991771587e-05, |
|
"loss": 1.613, |
|
"step": 15097 |
|
}, |
|
{ |
|
"epoch": 0.3829461387065949, |
|
"grad_norm": 1.335167646408081, |
|
"learning_rate": 2.7416809718923825e-05, |
|
"loss": 1.6197, |
|
"step": 15128 |
|
}, |
|
{ |
|
"epoch": 0.3837308644006657, |
|
"grad_norm": 1.334786295890808, |
|
"learning_rate": 2.7333207172706864e-05, |
|
"loss": 1.6284, |
|
"step": 15159 |
|
}, |
|
{ |
|
"epoch": 0.3845155900947366, |
|
"grad_norm": 1.4039522409439087, |
|
"learning_rate": 2.7249578296564088e-05, |
|
"loss": 1.5889, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 0.3853003157888075, |
|
"grad_norm": 1.4196487665176392, |
|
"learning_rate": 2.7165924034235973e-05, |
|
"loss": 1.6132, |
|
"step": 15221 |
|
}, |
|
{ |
|
"epoch": 0.3860850414828784, |
|
"grad_norm": 1.4701744318008423, |
|
"learning_rate": 2.708224532974953e-05, |
|
"loss": 1.6009, |
|
"step": 15252 |
|
}, |
|
{ |
|
"epoch": 0.3868697671769493, |
|
"grad_norm": 1.319935917854309, |
|
"learning_rate": 2.6998543127407538e-05, |
|
"loss": 1.6333, |
|
"step": 15283 |
|
}, |
|
{ |
|
"epoch": 0.3876544928710202, |
|
"grad_norm": 1.3962234258651733, |
|
"learning_rate": 2.6914818371777988e-05, |
|
"loss": 1.6175, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 0.3884392185650911, |
|
"grad_norm": 1.4284230470657349, |
|
"learning_rate": 2.6831072007683373e-05, |
|
"loss": 1.6007, |
|
"step": 15345 |
|
}, |
|
{ |
|
"epoch": 0.389223944259162, |
|
"grad_norm": 1.298251748085022, |
|
"learning_rate": 2.6747304980190018e-05, |
|
"loss": 1.605, |
|
"step": 15376 |
|
}, |
|
{ |
|
"epoch": 0.39000866995323286, |
|
"grad_norm": 1.294994831085205, |
|
"learning_rate": 2.6663518234597453e-05, |
|
"loss": 1.6025, |
|
"step": 15407 |
|
}, |
|
{ |
|
"epoch": 0.39079339564730375, |
|
"grad_norm": 1.440958023071289, |
|
"learning_rate": 2.6579712716427696e-05, |
|
"loss": 1.6002, |
|
"step": 15438 |
|
}, |
|
{ |
|
"epoch": 0.39157812134137465, |
|
"grad_norm": 1.439590573310852, |
|
"learning_rate": 2.6495889371414652e-05, |
|
"loss": 1.6025, |
|
"step": 15469 |
|
}, |
|
{ |
|
"epoch": 0.39236284703544555, |
|
"grad_norm": 1.4235502481460571, |
|
"learning_rate": 2.6412049145493367e-05, |
|
"loss": 1.5993, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.39314757272951645, |
|
"grad_norm": 1.4449518918991089, |
|
"learning_rate": 2.632819298478939e-05, |
|
"loss": 1.63, |
|
"step": 15531 |
|
}, |
|
{ |
|
"epoch": 0.39393229842358735, |
|
"grad_norm": 1.4422321319580078, |
|
"learning_rate": 2.6244321835608105e-05, |
|
"loss": 1.6193, |
|
"step": 15562 |
|
}, |
|
{ |
|
"epoch": 0.39471702411765824, |
|
"grad_norm": 1.4232275485992432, |
|
"learning_rate": 2.6160436644424024e-05, |
|
"loss": 1.6193, |
|
"step": 15593 |
|
}, |
|
{ |
|
"epoch": 0.39550174981172914, |
|
"grad_norm": 1.5187265872955322, |
|
"learning_rate": 2.6076538357870133e-05, |
|
"loss": 1.618, |
|
"step": 15624 |
|
}, |
|
{ |
|
"epoch": 0.3962864755058, |
|
"grad_norm": 1.4493205547332764, |
|
"learning_rate": 2.5992627922727196e-05, |
|
"loss": 1.6082, |
|
"step": 15655 |
|
}, |
|
{ |
|
"epoch": 0.3970712011998709, |
|
"grad_norm": 1.5100423097610474, |
|
"learning_rate": 2.5908706285913066e-05, |
|
"loss": 1.6081, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 0.3978559268939418, |
|
"grad_norm": 1.465114712715149, |
|
"learning_rate": 2.5824774394472008e-05, |
|
"loss": 1.6125, |
|
"step": 15717 |
|
}, |
|
{ |
|
"epoch": 0.3986406525880127, |
|
"grad_norm": 1.4160761833190918, |
|
"learning_rate": 2.5740833195563996e-05, |
|
"loss": 1.5951, |
|
"step": 15748 |
|
}, |
|
{ |
|
"epoch": 0.3994253782820836, |
|
"grad_norm": 1.381658673286438, |
|
"learning_rate": 2.5656883636454067e-05, |
|
"loss": 1.6051, |
|
"step": 15779 |
|
}, |
|
{ |
|
"epoch": 0.4002101039761545, |
|
"grad_norm": 1.3883142471313477, |
|
"learning_rate": 2.557292666450159e-05, |
|
"loss": 1.6039, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 0.4009948296702254, |
|
"grad_norm": 1.506911039352417, |
|
"learning_rate": 2.5488963227149566e-05, |
|
"loss": 1.5761, |
|
"step": 15841 |
|
}, |
|
{ |
|
"epoch": 0.40177955536429627, |
|
"grad_norm": 1.4450113773345947, |
|
"learning_rate": 2.5404994271913983e-05, |
|
"loss": 1.5734, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.4025642810583671, |
|
"grad_norm": 1.3970619440078735, |
|
"learning_rate": 2.5321020746373085e-05, |
|
"loss": 1.6094, |
|
"step": 15903 |
|
}, |
|
{ |
|
"epoch": 0.403349006752438, |
|
"grad_norm": 1.4761073589324951, |
|
"learning_rate": 2.52370435981567e-05, |
|
"loss": 1.6075, |
|
"step": 15934 |
|
}, |
|
{ |
|
"epoch": 0.4041337324465089, |
|
"grad_norm": 1.3969392776489258, |
|
"learning_rate": 2.5153063774935533e-05, |
|
"loss": 1.5788, |
|
"step": 15965 |
|
}, |
|
{ |
|
"epoch": 0.4049184581405798, |
|
"grad_norm": 1.3772737979888916, |
|
"learning_rate": 2.506908222441045e-05, |
|
"loss": 1.61, |
|
"step": 15996 |
|
}, |
|
{ |
|
"epoch": 0.4057031838346507, |
|
"grad_norm": 1.3969396352767944, |
|
"learning_rate": 2.498509989430187e-05, |
|
"loss": 1.5943, |
|
"step": 16027 |
|
}, |
|
{ |
|
"epoch": 0.4064879095287216, |
|
"grad_norm": 1.3052096366882324, |
|
"learning_rate": 2.4901117732338958e-05, |
|
"loss": 1.61, |
|
"step": 16058 |
|
}, |
|
{ |
|
"epoch": 0.4072726352227925, |
|
"grad_norm": 1.394612193107605, |
|
"learning_rate": 2.481713668624899e-05, |
|
"loss": 1.6018, |
|
"step": 16089 |
|
}, |
|
{ |
|
"epoch": 0.4080573609168634, |
|
"grad_norm": 1.3575886487960815, |
|
"learning_rate": 2.4733157703746663e-05, |
|
"loss": 1.5883, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 0.40884208661093424, |
|
"grad_norm": 1.3952176570892334, |
|
"learning_rate": 2.4649181732523392e-05, |
|
"loss": 1.6152, |
|
"step": 16151 |
|
}, |
|
{ |
|
"epoch": 0.40962681230500514, |
|
"grad_norm": 1.5711455345153809, |
|
"learning_rate": 2.4565209720236582e-05, |
|
"loss": 1.61, |
|
"step": 16182 |
|
}, |
|
{ |
|
"epoch": 0.41041153799907604, |
|
"grad_norm": 1.5258722305297852, |
|
"learning_rate": 2.4481242614498975e-05, |
|
"loss": 1.628, |
|
"step": 16213 |
|
}, |
|
{ |
|
"epoch": 0.41119626369314694, |
|
"grad_norm": 1.425764799118042, |
|
"learning_rate": 2.439728136286796e-05, |
|
"loss": 1.5872, |
|
"step": 16244 |
|
}, |
|
{ |
|
"epoch": 0.41198098938721783, |
|
"grad_norm": 1.3165446519851685, |
|
"learning_rate": 2.4313326912834852e-05, |
|
"loss": 1.6008, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 0.41276571508128873, |
|
"grad_norm": 1.386579155921936, |
|
"learning_rate": 2.4229380211814206e-05, |
|
"loss": 1.5783, |
|
"step": 16306 |
|
}, |
|
{ |
|
"epoch": 0.41355044077535963, |
|
"grad_norm": 1.464693307876587, |
|
"learning_rate": 2.4145442207133124e-05, |
|
"loss": 1.5947, |
|
"step": 16337 |
|
}, |
|
{ |
|
"epoch": 0.4143351664694305, |
|
"grad_norm": 1.334782600402832, |
|
"learning_rate": 2.406151384602059e-05, |
|
"loss": 1.5886, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 0.41511989216350137, |
|
"grad_norm": 1.4115489721298218, |
|
"learning_rate": 2.3977596075596747e-05, |
|
"loss": 1.5821, |
|
"step": 16399 |
|
}, |
|
{ |
|
"epoch": 0.41590461785757227, |
|
"grad_norm": 1.391065001487732, |
|
"learning_rate": 2.3893689842862223e-05, |
|
"loss": 1.6141, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 0.41668934355164317, |
|
"grad_norm": 1.4244657754898071, |
|
"learning_rate": 2.3809796094687475e-05, |
|
"loss": 1.6008, |
|
"step": 16461 |
|
}, |
|
{ |
|
"epoch": 0.41747406924571406, |
|
"grad_norm": 1.3113791942596436, |
|
"learning_rate": 2.372591577780202e-05, |
|
"loss": 1.608, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 0.41825879493978496, |
|
"grad_norm": 1.4262186288833618, |
|
"learning_rate": 2.3642049838783838e-05, |
|
"loss": 1.5801, |
|
"step": 16523 |
|
}, |
|
{ |
|
"epoch": 0.41904352063385586, |
|
"grad_norm": 1.4219175577163696, |
|
"learning_rate": 2.3558199224048666e-05, |
|
"loss": 1.592, |
|
"step": 16554 |
|
}, |
|
{ |
|
"epoch": 0.41982824632792676, |
|
"grad_norm": 1.4542045593261719, |
|
"learning_rate": 2.347436487983929e-05, |
|
"loss": 1.6062, |
|
"step": 16585 |
|
}, |
|
{ |
|
"epoch": 0.42061297202199766, |
|
"grad_norm": 1.4484211206436157, |
|
"learning_rate": 2.3390547752214888e-05, |
|
"loss": 1.6042, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 0.4213976977160685, |
|
"grad_norm": 1.4561681747436523, |
|
"learning_rate": 2.330674878704035e-05, |
|
"loss": 1.617, |
|
"step": 16647 |
|
}, |
|
{ |
|
"epoch": 0.4221824234101394, |
|
"grad_norm": 1.4250808954238892, |
|
"learning_rate": 2.322296892997561e-05, |
|
"loss": 1.5947, |
|
"step": 16678 |
|
}, |
|
{ |
|
"epoch": 0.4229671491042103, |
|
"grad_norm": 1.3762766122817993, |
|
"learning_rate": 2.313920912646497e-05, |
|
"loss": 1.5962, |
|
"step": 16709 |
|
}, |
|
{ |
|
"epoch": 0.4237518747982812, |
|
"grad_norm": 1.3508645296096802, |
|
"learning_rate": 2.305547032172643e-05, |
|
"loss": 1.5969, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 0.4245366004923521, |
|
"grad_norm": 1.4839844703674316, |
|
"learning_rate": 2.2971753460741014e-05, |
|
"loss": 1.5697, |
|
"step": 16771 |
|
}, |
|
{ |
|
"epoch": 0.425321326186423, |
|
"grad_norm": 1.4027475118637085, |
|
"learning_rate": 2.288805948824212e-05, |
|
"loss": 1.5758, |
|
"step": 16802 |
|
}, |
|
{ |
|
"epoch": 0.4261060518804939, |
|
"grad_norm": 1.3288599252700806, |
|
"learning_rate": 2.2804389348704858e-05, |
|
"loss": 1.5817, |
|
"step": 16833 |
|
}, |
|
{ |
|
"epoch": 0.4268907775745648, |
|
"grad_norm": 1.411028265953064, |
|
"learning_rate": 2.2720743986335374e-05, |
|
"loss": 1.6059, |
|
"step": 16864 |
|
}, |
|
{ |
|
"epoch": 0.4276755032686356, |
|
"grad_norm": 1.4803740978240967, |
|
"learning_rate": 2.2637124345060233e-05, |
|
"loss": 1.6061, |
|
"step": 16895 |
|
}, |
|
{ |
|
"epoch": 0.4284602289627065, |
|
"grad_norm": 1.6195276975631714, |
|
"learning_rate": 2.2553531368515695e-05, |
|
"loss": 1.5948, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 0.4292449546567774, |
|
"grad_norm": 1.368160605430603, |
|
"learning_rate": 2.2469966000037144e-05, |
|
"loss": 1.5884, |
|
"step": 16957 |
|
}, |
|
{ |
|
"epoch": 0.4300296803508483, |
|
"grad_norm": 2.9462714195251465, |
|
"learning_rate": 2.2386429182648417e-05, |
|
"loss": 1.5834, |
|
"step": 16988 |
|
}, |
|
{ |
|
"epoch": 0.4308144060449192, |
|
"grad_norm": 1.319602370262146, |
|
"learning_rate": 2.230292185905114e-05, |
|
"loss": 1.571, |
|
"step": 17019 |
|
}, |
|
{ |
|
"epoch": 0.4315991317389901, |
|
"grad_norm": 1.412001371383667, |
|
"learning_rate": 2.2219444971614116e-05, |
|
"loss": 1.6091, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.432383857433061, |
|
"grad_norm": 1.4459586143493652, |
|
"learning_rate": 2.2135999462362655e-05, |
|
"loss": 1.5803, |
|
"step": 17081 |
|
}, |
|
{ |
|
"epoch": 0.4331685831271319, |
|
"grad_norm": 1.3342795372009277, |
|
"learning_rate": 2.2052586272968003e-05, |
|
"loss": 1.5809, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 0.43395330882120275, |
|
"grad_norm": 1.3263877630233765, |
|
"learning_rate": 2.196920634473666e-05, |
|
"loss": 1.5742, |
|
"step": 17143 |
|
}, |
|
{ |
|
"epoch": 0.43473803451527365, |
|
"grad_norm": 1.3818809986114502, |
|
"learning_rate": 2.1885860618599787e-05, |
|
"loss": 1.5701, |
|
"step": 17174 |
|
}, |
|
{ |
|
"epoch": 0.43552276020934455, |
|
"grad_norm": 1.4324009418487549, |
|
"learning_rate": 2.1802550035102577e-05, |
|
"loss": 1.5622, |
|
"step": 17205 |
|
}, |
|
{ |
|
"epoch": 0.43630748590341545, |
|
"grad_norm": 1.3489223718643188, |
|
"learning_rate": 2.171927553439363e-05, |
|
"loss": 1.5737, |
|
"step": 17236 |
|
}, |
|
{ |
|
"epoch": 0.43709221159748635, |
|
"grad_norm": 1.6844401359558105, |
|
"learning_rate": 2.1636038056214376e-05, |
|
"loss": 1.5916, |
|
"step": 17267 |
|
}, |
|
{ |
|
"epoch": 0.43787693729155724, |
|
"grad_norm": 1.3632712364196777, |
|
"learning_rate": 2.155283853988844e-05, |
|
"loss": 1.6055, |
|
"step": 17298 |
|
}, |
|
{ |
|
"epoch": 0.43866166298562814, |
|
"grad_norm": 1.4866870641708374, |
|
"learning_rate": 2.146967792431106e-05, |
|
"loss": 1.5858, |
|
"step": 17329 |
|
}, |
|
{ |
|
"epoch": 0.43944638867969904, |
|
"grad_norm": 1.5456846952438354, |
|
"learning_rate": 2.138655714793849e-05, |
|
"loss": 1.6098, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 0.44023111437376994, |
|
"grad_norm": 1.4177597761154175, |
|
"learning_rate": 2.1303477148777367e-05, |
|
"loss": 1.5833, |
|
"step": 17391 |
|
}, |
|
{ |
|
"epoch": 0.4410158400678408, |
|
"grad_norm": 1.4126933813095093, |
|
"learning_rate": 2.122043886437421e-05, |
|
"loss": 1.599, |
|
"step": 17422 |
|
}, |
|
{ |
|
"epoch": 0.4418005657619117, |
|
"grad_norm": 1.4183374643325806, |
|
"learning_rate": 2.1137443231804765e-05, |
|
"loss": 1.5941, |
|
"step": 17453 |
|
}, |
|
{ |
|
"epoch": 0.4425852914559826, |
|
"grad_norm": 1.4230761528015137, |
|
"learning_rate": 2.105449118766347e-05, |
|
"loss": 1.5743, |
|
"step": 17484 |
|
}, |
|
{ |
|
"epoch": 0.4433700171500535, |
|
"grad_norm": 1.6844847202301025, |
|
"learning_rate": 2.097158366805287e-05, |
|
"loss": 1.5672, |
|
"step": 17515 |
|
}, |
|
{ |
|
"epoch": 0.4441547428441244, |
|
"grad_norm": 1.410435438156128, |
|
"learning_rate": 2.0888721608573047e-05, |
|
"loss": 1.5896, |
|
"step": 17546 |
|
}, |
|
{ |
|
"epoch": 0.44493946853819527, |
|
"grad_norm": 1.3948931694030762, |
|
"learning_rate": 2.0805905944311087e-05, |
|
"loss": 1.5899, |
|
"step": 17577 |
|
}, |
|
{ |
|
"epoch": 0.44572419423226617, |
|
"grad_norm": 1.3747113943099976, |
|
"learning_rate": 2.0723137609830497e-05, |
|
"loss": 1.5576, |
|
"step": 17608 |
|
}, |
|
{ |
|
"epoch": 0.44650891992633707, |
|
"grad_norm": 1.477161169052124, |
|
"learning_rate": 2.0640417539160686e-05, |
|
"loss": 1.5576, |
|
"step": 17639 |
|
}, |
|
{ |
|
"epoch": 0.4472936456204079, |
|
"grad_norm": 1.372091293334961, |
|
"learning_rate": 2.0557746665786427e-05, |
|
"loss": 1.5958, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 0.4480783713144788, |
|
"grad_norm": 1.361820936203003, |
|
"learning_rate": 2.0475125922637256e-05, |
|
"loss": 1.5917, |
|
"step": 17701 |
|
}, |
|
{ |
|
"epoch": 0.4488630970085497, |
|
"grad_norm": 1.367297887802124, |
|
"learning_rate": 2.0392556242077047e-05, |
|
"loss": 1.5965, |
|
"step": 17732 |
|
}, |
|
{ |
|
"epoch": 0.4496478227026206, |
|
"grad_norm": 1.538565754890442, |
|
"learning_rate": 2.031003855589343e-05, |
|
"loss": 1.5814, |
|
"step": 17763 |
|
}, |
|
{ |
|
"epoch": 0.4504325483966915, |
|
"grad_norm": 1.4618374109268188, |
|
"learning_rate": 2.022757379528727e-05, |
|
"loss": 1.5852, |
|
"step": 17794 |
|
}, |
|
{ |
|
"epoch": 0.4512172740907624, |
|
"grad_norm": 1.3954309225082397, |
|
"learning_rate": 2.0145162890862184e-05, |
|
"loss": 1.5576, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 0.4520019997848333, |
|
"grad_norm": 1.33854079246521, |
|
"learning_rate": 2.0062806772614022e-05, |
|
"loss": 1.5793, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 0.4527867254789042, |
|
"grad_norm": 1.4751428365707397, |
|
"learning_rate": 1.9980506369920392e-05, |
|
"loss": 1.5831, |
|
"step": 17887 |
|
}, |
|
{ |
|
"epoch": 0.45357145117297504, |
|
"grad_norm": 1.3836451768875122, |
|
"learning_rate": 1.989826261153015e-05, |
|
"loss": 1.5967, |
|
"step": 17918 |
|
}, |
|
{ |
|
"epoch": 0.45435617686704594, |
|
"grad_norm": 1.4987123012542725, |
|
"learning_rate": 1.9816076425552923e-05, |
|
"loss": 1.5953, |
|
"step": 17949 |
|
}, |
|
{ |
|
"epoch": 0.45514090256111683, |
|
"grad_norm": 1.3838002681732178, |
|
"learning_rate": 1.9733948739448676e-05, |
|
"loss": 1.5614, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 0.45592562825518773, |
|
"grad_norm": 1.358023762702942, |
|
"learning_rate": 1.9651880480017155e-05, |
|
"loss": 1.5737, |
|
"step": 18011 |
|
}, |
|
{ |
|
"epoch": 0.45671035394925863, |
|
"grad_norm": 1.3181227445602417, |
|
"learning_rate": 1.9569872573387516e-05, |
|
"loss": 1.5806, |
|
"step": 18042 |
|
}, |
|
{ |
|
"epoch": 0.4574950796433295, |
|
"grad_norm": 1.3574905395507812, |
|
"learning_rate": 1.9487925945007854e-05, |
|
"loss": 1.5779, |
|
"step": 18073 |
|
}, |
|
{ |
|
"epoch": 0.4582798053374004, |
|
"grad_norm": 1.3550188541412354, |
|
"learning_rate": 1.9406041519634726e-05, |
|
"loss": 1.5723, |
|
"step": 18104 |
|
}, |
|
{ |
|
"epoch": 0.4590645310314713, |
|
"grad_norm": 1.3672763109207153, |
|
"learning_rate": 1.932422022132275e-05, |
|
"loss": 1.5869, |
|
"step": 18135 |
|
}, |
|
{ |
|
"epoch": 0.45984925672554217, |
|
"grad_norm": 1.428689956665039, |
|
"learning_rate": 1.924246297341414e-05, |
|
"loss": 1.5743, |
|
"step": 18166 |
|
}, |
|
{ |
|
"epoch": 0.46063398241961306, |
|
"grad_norm": 1.3313350677490234, |
|
"learning_rate": 1.9160770698528338e-05, |
|
"loss": 1.5836, |
|
"step": 18197 |
|
}, |
|
{ |
|
"epoch": 0.46141870811368396, |
|
"grad_norm": 1.3049378395080566, |
|
"learning_rate": 1.907914431855156e-05, |
|
"loss": 1.5753, |
|
"step": 18228 |
|
}, |
|
{ |
|
"epoch": 0.46220343380775486, |
|
"grad_norm": 1.3737244606018066, |
|
"learning_rate": 1.8997584754626412e-05, |
|
"loss": 1.589, |
|
"step": 18259 |
|
}, |
|
{ |
|
"epoch": 0.46298815950182576, |
|
"grad_norm": 1.4522390365600586, |
|
"learning_rate": 1.8916092927141486e-05, |
|
"loss": 1.5898, |
|
"step": 18290 |
|
}, |
|
{ |
|
"epoch": 0.46377288519589666, |
|
"grad_norm": 1.3189274072647095, |
|
"learning_rate": 1.883466975572098e-05, |
|
"loss": 1.5721, |
|
"step": 18321 |
|
}, |
|
{ |
|
"epoch": 0.46455761088996755, |
|
"grad_norm": 1.3040895462036133, |
|
"learning_rate": 1.8753316159214312e-05, |
|
"loss": 1.58, |
|
"step": 18352 |
|
}, |
|
{ |
|
"epoch": 0.46534233658403845, |
|
"grad_norm": 1.3528228998184204, |
|
"learning_rate": 1.8672033055685766e-05, |
|
"loss": 1.5812, |
|
"step": 18383 |
|
}, |
|
{ |
|
"epoch": 0.4661270622781093, |
|
"grad_norm": 1.3759435415267944, |
|
"learning_rate": 1.8590821362404116e-05, |
|
"loss": 1.5905, |
|
"step": 18414 |
|
}, |
|
{ |
|
"epoch": 0.4669117879721802, |
|
"grad_norm": 1.374550223350525, |
|
"learning_rate": 1.8509681995832294e-05, |
|
"loss": 1.5737, |
|
"step": 18445 |
|
}, |
|
{ |
|
"epoch": 0.4676965136662511, |
|
"grad_norm": 1.4290833473205566, |
|
"learning_rate": 1.8428615871617004e-05, |
|
"loss": 1.577, |
|
"step": 18476 |
|
}, |
|
{ |
|
"epoch": 0.468481239360322, |
|
"grad_norm": 1.287758231163025, |
|
"learning_rate": 1.8347623904578448e-05, |
|
"loss": 1.5652, |
|
"step": 18507 |
|
}, |
|
{ |
|
"epoch": 0.4692659650543929, |
|
"grad_norm": 1.3034193515777588, |
|
"learning_rate": 1.8266707008699975e-05, |
|
"loss": 1.5708, |
|
"step": 18538 |
|
}, |
|
{ |
|
"epoch": 0.4700506907484638, |
|
"grad_norm": 1.3413418531417847, |
|
"learning_rate": 1.818586609711774e-05, |
|
"loss": 1.5629, |
|
"step": 18569 |
|
}, |
|
{ |
|
"epoch": 0.4708354164425347, |
|
"grad_norm": 1.3434704542160034, |
|
"learning_rate": 1.8105102082110462e-05, |
|
"loss": 1.5726, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.4716201421366056, |
|
"grad_norm": 1.3321512937545776, |
|
"learning_rate": 1.8024415875089058e-05, |
|
"loss": 1.5767, |
|
"step": 18631 |
|
}, |
|
{ |
|
"epoch": 0.4724048678306764, |
|
"grad_norm": 1.3440663814544678, |
|
"learning_rate": 1.7943808386586407e-05, |
|
"loss": 1.5971, |
|
"step": 18662 |
|
}, |
|
{ |
|
"epoch": 0.4731895935247473, |
|
"grad_norm": 1.356490135192871, |
|
"learning_rate": 1.7863280526247073e-05, |
|
"loss": 1.5511, |
|
"step": 18693 |
|
}, |
|
{ |
|
"epoch": 0.4739743192188182, |
|
"grad_norm": 1.5594719648361206, |
|
"learning_rate": 1.7782833202817003e-05, |
|
"loss": 1.5807, |
|
"step": 18724 |
|
}, |
|
{ |
|
"epoch": 0.4747590449128891, |
|
"grad_norm": 1.3007055521011353, |
|
"learning_rate": 1.7702467324133327e-05, |
|
"loss": 1.5864, |
|
"step": 18755 |
|
}, |
|
{ |
|
"epoch": 0.47554377060696, |
|
"grad_norm": 1.3085851669311523, |
|
"learning_rate": 1.7622183797114042e-05, |
|
"loss": 1.5624, |
|
"step": 18786 |
|
}, |
|
{ |
|
"epoch": 0.4763284963010309, |
|
"grad_norm": 1.4323654174804688, |
|
"learning_rate": 1.7541983527747838e-05, |
|
"loss": 1.5759, |
|
"step": 18817 |
|
}, |
|
{ |
|
"epoch": 0.4771132219951018, |
|
"grad_norm": 1.6249394416809082, |
|
"learning_rate": 1.746186742108387e-05, |
|
"loss": 1.5853, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 0.4778979476891727, |
|
"grad_norm": 1.4717755317687988, |
|
"learning_rate": 1.73818363812215e-05, |
|
"loss": 1.5627, |
|
"step": 18879 |
|
}, |
|
{ |
|
"epoch": 0.47868267338324355, |
|
"grad_norm": 1.4533812999725342, |
|
"learning_rate": 1.7301891311300153e-05, |
|
"loss": 1.5582, |
|
"step": 18910 |
|
}, |
|
{ |
|
"epoch": 0.47946739907731445, |
|
"grad_norm": 1.4233548641204834, |
|
"learning_rate": 1.7222033113489055e-05, |
|
"loss": 1.5829, |
|
"step": 18941 |
|
}, |
|
{ |
|
"epoch": 0.48025212477138535, |
|
"grad_norm": 1.4943761825561523, |
|
"learning_rate": 1.7142262688977127e-05, |
|
"loss": 1.563, |
|
"step": 18972 |
|
}, |
|
{ |
|
"epoch": 0.48103685046545624, |
|
"grad_norm": 1.4122124910354614, |
|
"learning_rate": 1.7062580937962764e-05, |
|
"loss": 1.5723, |
|
"step": 19003 |
|
}, |
|
{ |
|
"epoch": 0.48182157615952714, |
|
"grad_norm": 1.3874859809875488, |
|
"learning_rate": 1.698298875964369e-05, |
|
"loss": 1.5606, |
|
"step": 19034 |
|
}, |
|
{ |
|
"epoch": 0.48260630185359804, |
|
"grad_norm": 1.3442684412002563, |
|
"learning_rate": 1.690348705220684e-05, |
|
"loss": 1.5794, |
|
"step": 19065 |
|
}, |
|
{ |
|
"epoch": 0.48339102754766894, |
|
"grad_norm": 1.5870423316955566, |
|
"learning_rate": 1.6824076712818156e-05, |
|
"loss": 1.5782, |
|
"step": 19096 |
|
}, |
|
{ |
|
"epoch": 0.48417575324173984, |
|
"grad_norm": 1.3558776378631592, |
|
"learning_rate": 1.6744758637612533e-05, |
|
"loss": 1.5642, |
|
"step": 19127 |
|
}, |
|
{ |
|
"epoch": 0.4849604789358107, |
|
"grad_norm": 1.4363101720809937, |
|
"learning_rate": 1.6665533721683664e-05, |
|
"loss": 1.5698, |
|
"step": 19158 |
|
}, |
|
{ |
|
"epoch": 0.4857452046298816, |
|
"grad_norm": 1.423425555229187, |
|
"learning_rate": 1.6586402859073974e-05, |
|
"loss": 1.5712, |
|
"step": 19189 |
|
}, |
|
{ |
|
"epoch": 0.4865299303239525, |
|
"grad_norm": 1.3792959451675415, |
|
"learning_rate": 1.6507366942764463e-05, |
|
"loss": 1.567, |
|
"step": 19220 |
|
}, |
|
{ |
|
"epoch": 0.4873146560180234, |
|
"grad_norm": 1.4269790649414062, |
|
"learning_rate": 1.6428426864664732e-05, |
|
"loss": 1.5616, |
|
"step": 19251 |
|
}, |
|
{ |
|
"epoch": 0.48809938171209427, |
|
"grad_norm": 1.4407951831817627, |
|
"learning_rate": 1.6349583515602816e-05, |
|
"loss": 1.5786, |
|
"step": 19282 |
|
}, |
|
{ |
|
"epoch": 0.48888410740616517, |
|
"grad_norm": 1.4874082803726196, |
|
"learning_rate": 1.6270837785315208e-05, |
|
"loss": 1.5907, |
|
"step": 19313 |
|
}, |
|
{ |
|
"epoch": 0.48966883310023607, |
|
"grad_norm": 1.382135272026062, |
|
"learning_rate": 1.619219056243676e-05, |
|
"loss": 1.5673, |
|
"step": 19344 |
|
}, |
|
{ |
|
"epoch": 0.49045355879430697, |
|
"grad_norm": 1.3598939180374146, |
|
"learning_rate": 1.6113642734490698e-05, |
|
"loss": 1.5548, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 0.4912382844883778, |
|
"grad_norm": 1.4186638593673706, |
|
"learning_rate": 1.6035195187878577e-05, |
|
"loss": 1.5834, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 0.4920230101824487, |
|
"grad_norm": 1.3320554494857788, |
|
"learning_rate": 1.5956848807870305e-05, |
|
"loss": 1.5435, |
|
"step": 19437 |
|
}, |
|
{ |
|
"epoch": 0.4928077358765196, |
|
"grad_norm": 1.3170437812805176, |
|
"learning_rate": 1.587860447859413e-05, |
|
"loss": 1.5538, |
|
"step": 19468 |
|
}, |
|
{ |
|
"epoch": 0.4935924615705905, |
|
"grad_norm": 1.463334321975708, |
|
"learning_rate": 1.5800463083026686e-05, |
|
"loss": 1.5603, |
|
"step": 19499 |
|
}, |
|
{ |
|
"epoch": 0.4943771872646614, |
|
"grad_norm": 1.4043060541152954, |
|
"learning_rate": 1.572242550298298e-05, |
|
"loss": 1.5778, |
|
"step": 19530 |
|
}, |
|
{ |
|
"epoch": 0.4951619129587323, |
|
"grad_norm": 1.3377630710601807, |
|
"learning_rate": 1.56444926191065e-05, |
|
"loss": 1.5836, |
|
"step": 19561 |
|
}, |
|
{ |
|
"epoch": 0.4959466386528032, |
|
"grad_norm": 1.4007608890533447, |
|
"learning_rate": 1.5566665310859257e-05, |
|
"loss": 1.5691, |
|
"step": 19592 |
|
}, |
|
{ |
|
"epoch": 0.4967313643468741, |
|
"grad_norm": 1.3231667280197144, |
|
"learning_rate": 1.5488944456511846e-05, |
|
"loss": 1.5517, |
|
"step": 19623 |
|
}, |
|
{ |
|
"epoch": 0.49751609004094494, |
|
"grad_norm": 1.4343535900115967, |
|
"learning_rate": 1.5411330933133546e-05, |
|
"loss": 1.5753, |
|
"step": 19654 |
|
}, |
|
{ |
|
"epoch": 0.49830081573501583, |
|
"grad_norm": 1.2943058013916016, |
|
"learning_rate": 1.533382561658241e-05, |
|
"loss": 1.5571, |
|
"step": 19685 |
|
}, |
|
{ |
|
"epoch": 0.49908554142908673, |
|
"grad_norm": 1.2815899848937988, |
|
"learning_rate": 1.525642938149541e-05, |
|
"loss": 1.5796, |
|
"step": 19716 |
|
}, |
|
{ |
|
"epoch": 0.49987026712315763, |
|
"grad_norm": 1.4025834798812866, |
|
"learning_rate": 1.5179143101278536e-05, |
|
"loss": 1.5672, |
|
"step": 19747 |
|
}, |
|
{ |
|
"epoch": 0.5006549928172285, |
|
"grad_norm": 1.4670218229293823, |
|
"learning_rate": 1.5101967648096955e-05, |
|
"loss": 1.5702, |
|
"step": 19778 |
|
}, |
|
{ |
|
"epoch": 0.5014397185112994, |
|
"grad_norm": 1.4222999811172485, |
|
"learning_rate": 1.5024903892865172e-05, |
|
"loss": 1.5842, |
|
"step": 19809 |
|
}, |
|
{ |
|
"epoch": 0.5022244442053703, |
|
"grad_norm": 1.4714964628219604, |
|
"learning_rate": 1.4947952705237184e-05, |
|
"loss": 1.5552, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 0.5030091698994412, |
|
"grad_norm": 1.3124053478240967, |
|
"learning_rate": 1.4871114953596682e-05, |
|
"loss": 1.567, |
|
"step": 19871 |
|
}, |
|
{ |
|
"epoch": 0.5037938955935121, |
|
"grad_norm": 1.343239188194275, |
|
"learning_rate": 1.4794391505047256e-05, |
|
"loss": 1.5829, |
|
"step": 19902 |
|
}, |
|
{ |
|
"epoch": 0.504578621287583, |
|
"grad_norm": 1.4160040616989136, |
|
"learning_rate": 1.4717783225402596e-05, |
|
"loss": 1.5479, |
|
"step": 19933 |
|
}, |
|
{ |
|
"epoch": 0.5053633469816539, |
|
"grad_norm": 1.3658647537231445, |
|
"learning_rate": 1.4641290979176735e-05, |
|
"loss": 1.558, |
|
"step": 19964 |
|
}, |
|
{ |
|
"epoch": 0.5061480726757248, |
|
"grad_norm": 1.2913247346878052, |
|
"learning_rate": 1.4564915629574246e-05, |
|
"loss": 1.5795, |
|
"step": 19995 |
|
}, |
|
{ |
|
"epoch": 0.5069327983697957, |
|
"grad_norm": 1.3975298404693604, |
|
"learning_rate": 1.4488658038480601e-05, |
|
"loss": 1.5557, |
|
"step": 20026 |
|
}, |
|
{ |
|
"epoch": 0.5077175240638665, |
|
"grad_norm": 1.342119812965393, |
|
"learning_rate": 1.4412519066452323e-05, |
|
"loss": 1.5727, |
|
"step": 20057 |
|
}, |
|
{ |
|
"epoch": 0.5085022497579375, |
|
"grad_norm": 1.3325005769729614, |
|
"learning_rate": 1.4336499572707373e-05, |
|
"loss": 1.5573, |
|
"step": 20088 |
|
}, |
|
{ |
|
"epoch": 0.5092869754520083, |
|
"grad_norm": 1.3986520767211914, |
|
"learning_rate": 1.4260600415115433e-05, |
|
"loss": 1.5537, |
|
"step": 20119 |
|
}, |
|
{ |
|
"epoch": 0.5100717011460792, |
|
"grad_norm": 1.3560576438903809, |
|
"learning_rate": 1.4184822450188137e-05, |
|
"loss": 1.5529, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.5108564268401501, |
|
"grad_norm": 1.4381458759307861, |
|
"learning_rate": 1.410916653306954e-05, |
|
"loss": 1.5845, |
|
"step": 20181 |
|
}, |
|
{ |
|
"epoch": 0.511641152534221, |
|
"grad_norm": 1.6817706823349, |
|
"learning_rate": 1.403363351752639e-05, |
|
"loss": 1.569, |
|
"step": 20212 |
|
}, |
|
{ |
|
"epoch": 0.5124258782282919, |
|
"grad_norm": 1.3956488370895386, |
|
"learning_rate": 1.3958224255938485e-05, |
|
"loss": 1.5561, |
|
"step": 20243 |
|
}, |
|
{ |
|
"epoch": 0.5132106039223627, |
|
"grad_norm": 1.3474819660186768, |
|
"learning_rate": 1.388293959928911e-05, |
|
"loss": 1.5608, |
|
"step": 20274 |
|
}, |
|
{ |
|
"epoch": 0.5139953296164337, |
|
"grad_norm": 1.286340594291687, |
|
"learning_rate": 1.3807780397155379e-05, |
|
"loss": 1.5661, |
|
"step": 20305 |
|
}, |
|
{ |
|
"epoch": 0.5147800553105045, |
|
"grad_norm": 1.3667712211608887, |
|
"learning_rate": 1.3732747497698655e-05, |
|
"loss": 1.5778, |
|
"step": 20336 |
|
}, |
|
{ |
|
"epoch": 0.5155647810045755, |
|
"grad_norm": 1.4048058986663818, |
|
"learning_rate": 1.3657841747655038e-05, |
|
"loss": 1.5444, |
|
"step": 20367 |
|
}, |
|
{ |
|
"epoch": 0.5163495066986463, |
|
"grad_norm": 1.5085017681121826, |
|
"learning_rate": 1.3583063992325706e-05, |
|
"loss": 1.5657, |
|
"step": 20398 |
|
}, |
|
{ |
|
"epoch": 0.5171342323927173, |
|
"grad_norm": 1.3968846797943115, |
|
"learning_rate": 1.3508415075567496e-05, |
|
"loss": 1.5641, |
|
"step": 20429 |
|
}, |
|
{ |
|
"epoch": 0.5179189580867881, |
|
"grad_norm": 1.403813123703003, |
|
"learning_rate": 1.343389583978327e-05, |
|
"loss": 1.5768, |
|
"step": 20460 |
|
}, |
|
{ |
|
"epoch": 0.5187036837808591, |
|
"grad_norm": 1.3661153316497803, |
|
"learning_rate": 1.3359507125912468e-05, |
|
"loss": 1.5511, |
|
"step": 20491 |
|
}, |
|
{ |
|
"epoch": 0.5194884094749299, |
|
"grad_norm": 1.4918231964111328, |
|
"learning_rate": 1.3285249773421627e-05, |
|
"loss": 1.5552, |
|
"step": 20522 |
|
}, |
|
{ |
|
"epoch": 0.5202731351690008, |
|
"grad_norm": 1.366255521774292, |
|
"learning_rate": 1.3211124620294884e-05, |
|
"loss": 1.5573, |
|
"step": 20553 |
|
}, |
|
{ |
|
"epoch": 0.5210578608630717, |
|
"grad_norm": 1.360115885734558, |
|
"learning_rate": 1.313713250302451e-05, |
|
"loss": 1.5743, |
|
"step": 20584 |
|
}, |
|
{ |
|
"epoch": 0.5218425865571426, |
|
"grad_norm": 1.396219253540039, |
|
"learning_rate": 1.3063274256601479e-05, |
|
"loss": 1.5313, |
|
"step": 20615 |
|
}, |
|
{ |
|
"epoch": 0.5226273122512135, |
|
"grad_norm": 1.3751533031463623, |
|
"learning_rate": 1.2989550714506086e-05, |
|
"loss": 1.554, |
|
"step": 20646 |
|
}, |
|
{ |
|
"epoch": 0.5234120379452843, |
|
"grad_norm": 1.3931307792663574, |
|
"learning_rate": 1.291596270869846e-05, |
|
"loss": 1.572, |
|
"step": 20677 |
|
}, |
|
{ |
|
"epoch": 0.5241967636393553, |
|
"grad_norm": 1.3172565698623657, |
|
"learning_rate": 1.284251106960927e-05, |
|
"loss": 1.556, |
|
"step": 20708 |
|
}, |
|
{ |
|
"epoch": 0.5249814893334261, |
|
"grad_norm": 1.4660224914550781, |
|
"learning_rate": 1.2769196626130263e-05, |
|
"loss": 1.563, |
|
"step": 20739 |
|
}, |
|
{ |
|
"epoch": 0.5257662150274971, |
|
"grad_norm": 1.3981261253356934, |
|
"learning_rate": 1.2696020205604969e-05, |
|
"loss": 1.536, |
|
"step": 20770 |
|
}, |
|
{ |
|
"epoch": 0.5265509407215679, |
|
"grad_norm": 1.3775140047073364, |
|
"learning_rate": 1.2622982633819359e-05, |
|
"loss": 1.5538, |
|
"step": 20801 |
|
}, |
|
{ |
|
"epoch": 0.5273356664156388, |
|
"grad_norm": 1.3806031942367554, |
|
"learning_rate": 1.2550084734992484e-05, |
|
"loss": 1.5717, |
|
"step": 20832 |
|
}, |
|
{ |
|
"epoch": 0.5281203921097097, |
|
"grad_norm": 1.663273572921753, |
|
"learning_rate": 1.247732733176724e-05, |
|
"loss": 1.5474, |
|
"step": 20863 |
|
}, |
|
{ |
|
"epoch": 0.5289051178037806, |
|
"grad_norm": 1.4349000453948975, |
|
"learning_rate": 1.2404711245201044e-05, |
|
"loss": 1.563, |
|
"step": 20894 |
|
}, |
|
{ |
|
"epoch": 0.5296898434978515, |
|
"grad_norm": 1.4207381010055542, |
|
"learning_rate": 1.2332237294756535e-05, |
|
"loss": 1.5769, |
|
"step": 20925 |
|
}, |
|
{ |
|
"epoch": 0.5304745691919224, |
|
"grad_norm": 1.3234254121780396, |
|
"learning_rate": 1.225990629829241e-05, |
|
"loss": 1.5419, |
|
"step": 20956 |
|
}, |
|
{ |
|
"epoch": 0.5312592948859933, |
|
"grad_norm": 1.3426439762115479, |
|
"learning_rate": 1.2187719072054136e-05, |
|
"loss": 1.5479, |
|
"step": 20987 |
|
}, |
|
{ |
|
"epoch": 0.5320440205800642, |
|
"grad_norm": 1.3690837621688843, |
|
"learning_rate": 1.2115676430664735e-05, |
|
"loss": 1.5668, |
|
"step": 21018 |
|
}, |
|
{ |
|
"epoch": 0.532828746274135, |
|
"grad_norm": 1.4441026449203491, |
|
"learning_rate": 1.2043779187115647e-05, |
|
"loss": 1.5663, |
|
"step": 21049 |
|
}, |
|
{ |
|
"epoch": 0.533613471968206, |
|
"grad_norm": 1.379137396812439, |
|
"learning_rate": 1.1972028152757476e-05, |
|
"loss": 1.5704, |
|
"step": 21080 |
|
}, |
|
{ |
|
"epoch": 0.5343981976622768, |
|
"grad_norm": 1.3750004768371582, |
|
"learning_rate": 1.1900424137290889e-05, |
|
"loss": 1.5518, |
|
"step": 21111 |
|
}, |
|
{ |
|
"epoch": 0.5351829233563478, |
|
"grad_norm": 1.465265154838562, |
|
"learning_rate": 1.1828967948757482e-05, |
|
"loss": 1.5539, |
|
"step": 21142 |
|
}, |
|
{ |
|
"epoch": 0.5359676490504186, |
|
"grad_norm": 1.3172025680541992, |
|
"learning_rate": 1.175766039353062e-05, |
|
"loss": 1.5544, |
|
"step": 21173 |
|
}, |
|
{ |
|
"epoch": 0.5367523747444896, |
|
"grad_norm": 1.4065696001052856, |
|
"learning_rate": 1.1686502276306382e-05, |
|
"loss": 1.5586, |
|
"step": 21204 |
|
}, |
|
{ |
|
"epoch": 0.5375371004385604, |
|
"grad_norm": 1.45732581615448, |
|
"learning_rate": 1.1615494400094445e-05, |
|
"loss": 1.5728, |
|
"step": 21235 |
|
}, |
|
{ |
|
"epoch": 0.5383218261326314, |
|
"grad_norm": 1.3364806175231934, |
|
"learning_rate": 1.1544637566209029e-05, |
|
"loss": 1.5569, |
|
"step": 21266 |
|
}, |
|
{ |
|
"epoch": 0.5391065518267022, |
|
"grad_norm": 1.3799667358398438, |
|
"learning_rate": 1.1473932574259886e-05, |
|
"loss": 1.5344, |
|
"step": 21297 |
|
}, |
|
{ |
|
"epoch": 0.539891277520773, |
|
"grad_norm": 1.4128960371017456, |
|
"learning_rate": 1.1403380222143247e-05, |
|
"loss": 1.5546, |
|
"step": 21328 |
|
}, |
|
{ |
|
"epoch": 0.540676003214844, |
|
"grad_norm": 1.5169612169265747, |
|
"learning_rate": 1.1332981306032808e-05, |
|
"loss": 1.5471, |
|
"step": 21359 |
|
}, |
|
{ |
|
"epoch": 0.5414607289089148, |
|
"grad_norm": 1.4209131002426147, |
|
"learning_rate": 1.1262736620370762e-05, |
|
"loss": 1.5654, |
|
"step": 21390 |
|
}, |
|
{ |
|
"epoch": 0.5422454546029858, |
|
"grad_norm": 1.3103234767913818, |
|
"learning_rate": 1.1192646957858854e-05, |
|
"loss": 1.5492, |
|
"step": 21421 |
|
}, |
|
{ |
|
"epoch": 0.5430301802970566, |
|
"grad_norm": 1.7383350133895874, |
|
"learning_rate": 1.1122713109449381e-05, |
|
"loss": 1.5502, |
|
"step": 21452 |
|
}, |
|
{ |
|
"epoch": 0.5438149059911276, |
|
"grad_norm": 1.3104016780853271, |
|
"learning_rate": 1.105293586433634e-05, |
|
"loss": 1.5564, |
|
"step": 21483 |
|
}, |
|
{ |
|
"epoch": 0.5445996316851984, |
|
"grad_norm": 1.3233284950256348, |
|
"learning_rate": 1.0983316009946446e-05, |
|
"loss": 1.5274, |
|
"step": 21514 |
|
}, |
|
{ |
|
"epoch": 0.5453843573792693, |
|
"grad_norm": 1.4942415952682495, |
|
"learning_rate": 1.0913854331930282e-05, |
|
"loss": 1.5643, |
|
"step": 21545 |
|
}, |
|
{ |
|
"epoch": 0.5461690830733402, |
|
"grad_norm": 1.3964463472366333, |
|
"learning_rate": 1.0844551614153456e-05, |
|
"loss": 1.5575, |
|
"step": 21576 |
|
}, |
|
{ |
|
"epoch": 0.5469538087674111, |
|
"grad_norm": 1.4472683668136597, |
|
"learning_rate": 1.0775408638687725e-05, |
|
"loss": 1.5459, |
|
"step": 21607 |
|
}, |
|
{ |
|
"epoch": 0.547738534461482, |
|
"grad_norm": 1.3240516185760498, |
|
"learning_rate": 1.0706426185802165e-05, |
|
"loss": 1.5703, |
|
"step": 21638 |
|
}, |
|
{ |
|
"epoch": 0.5485232601555529, |
|
"grad_norm": 1.3561683893203735, |
|
"learning_rate": 1.0637605033954371e-05, |
|
"loss": 1.5429, |
|
"step": 21669 |
|
}, |
|
{ |
|
"epoch": 0.5493079858496238, |
|
"grad_norm": 1.3770638704299927, |
|
"learning_rate": 1.05689459597817e-05, |
|
"loss": 1.5575, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.5500927115436947, |
|
"grad_norm": 1.4219211339950562, |
|
"learning_rate": 1.050044973809246e-05, |
|
"loss": 1.5392, |
|
"step": 21731 |
|
}, |
|
{ |
|
"epoch": 0.5508774372377656, |
|
"grad_norm": 1.3968154191970825, |
|
"learning_rate": 1.043211714185722e-05, |
|
"loss": 1.559, |
|
"step": 21762 |
|
}, |
|
{ |
|
"epoch": 0.5516621629318365, |
|
"grad_norm": 1.3730138540267944, |
|
"learning_rate": 1.036394894220003e-05, |
|
"loss": 1.5452, |
|
"step": 21793 |
|
}, |
|
{ |
|
"epoch": 0.5524468886259073, |
|
"grad_norm": 1.407535433769226, |
|
"learning_rate": 1.0295945908389751e-05, |
|
"loss": 1.5477, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 0.5532316143199782, |
|
"grad_norm": 1.440319299697876, |
|
"learning_rate": 1.0228108807831393e-05, |
|
"loss": 1.5483, |
|
"step": 21855 |
|
}, |
|
{ |
|
"epoch": 0.5540163400140491, |
|
"grad_norm": 1.38417649269104, |
|
"learning_rate": 1.01604384060574e-05, |
|
"loss": 1.569, |
|
"step": 21886 |
|
}, |
|
{ |
|
"epoch": 0.55480106570812, |
|
"grad_norm": 1.51227867603302, |
|
"learning_rate": 1.009293546671907e-05, |
|
"loss": 1.5441, |
|
"step": 21917 |
|
}, |
|
{ |
|
"epoch": 0.5555857914021909, |
|
"grad_norm": 1.3792462348937988, |
|
"learning_rate": 1.002560075157791e-05, |
|
"loss": 1.5537, |
|
"step": 21948 |
|
}, |
|
{ |
|
"epoch": 0.5563705170962618, |
|
"grad_norm": 1.3728954792022705, |
|
"learning_rate": 9.958435020496995e-06, |
|
"loss": 1.5463, |
|
"step": 21979 |
|
}, |
|
{ |
|
"epoch": 0.5571552427903327, |
|
"grad_norm": 1.4337445497512817, |
|
"learning_rate": 9.89143903143249e-06, |
|
"loss": 1.5409, |
|
"step": 22010 |
|
}, |
|
{ |
|
"epoch": 0.5579399684844035, |
|
"grad_norm": 1.317431092262268, |
|
"learning_rate": 9.824613540425038e-06, |
|
"loss": 1.5541, |
|
"step": 22041 |
|
}, |
|
{ |
|
"epoch": 0.5587246941784745, |
|
"grad_norm": 1.3596452474594116, |
|
"learning_rate": 9.757959301591197e-06, |
|
"loss": 1.5465, |
|
"step": 22072 |
|
}, |
|
{ |
|
"epoch": 0.5595094198725453, |
|
"grad_norm": 1.4173970222473145, |
|
"learning_rate": 9.691477067115017e-06, |
|
"loss": 1.5534, |
|
"step": 22103 |
|
}, |
|
{ |
|
"epoch": 0.5602941455666163, |
|
"grad_norm": 2.4860451221466064, |
|
"learning_rate": 9.625167587239467e-06, |
|
"loss": 1.5458, |
|
"step": 22134 |
|
}, |
|
{ |
|
"epoch": 0.5610788712606871, |
|
"grad_norm": 1.440307378768921, |
|
"learning_rate": 9.559031610258007e-06, |
|
"loss": 1.5581, |
|
"step": 22165 |
|
}, |
|
{ |
|
"epoch": 0.5618635969547581, |
|
"grad_norm": 1.5789539813995361, |
|
"learning_rate": 9.493069882506164e-06, |
|
"loss": 1.5589, |
|
"step": 22196 |
|
}, |
|
{ |
|
"epoch": 0.5626483226488289, |
|
"grad_norm": 1.3445873260498047, |
|
"learning_rate": 9.427283148353056e-06, |
|
"loss": 1.5533, |
|
"step": 22227 |
|
}, |
|
{ |
|
"epoch": 0.5634330483428999, |
|
"grad_norm": 1.3744895458221436, |
|
"learning_rate": 9.361672150193052e-06, |
|
"loss": 1.5497, |
|
"step": 22258 |
|
}, |
|
{ |
|
"epoch": 0.5642177740369707, |
|
"grad_norm": 1.4480764865875244, |
|
"learning_rate": 9.29623762843734e-06, |
|
"loss": 1.5521, |
|
"step": 22289 |
|
}, |
|
{ |
|
"epoch": 0.5650024997310416, |
|
"grad_norm": 1.3482125997543335, |
|
"learning_rate": 9.230980321505594e-06, |
|
"loss": 1.5514, |
|
"step": 22320 |
|
}, |
|
{ |
|
"epoch": 0.5657872254251125, |
|
"grad_norm": 1.4724624156951904, |
|
"learning_rate": 9.165900965817668e-06, |
|
"loss": 1.558, |
|
"step": 22351 |
|
}, |
|
{ |
|
"epoch": 0.5665719511191833, |
|
"grad_norm": 1.4756817817687988, |
|
"learning_rate": 9.101000295785245e-06, |
|
"loss": 1.5519, |
|
"step": 22382 |
|
}, |
|
{ |
|
"epoch": 0.5673566768132543, |
|
"grad_norm": 1.4908230304718018, |
|
"learning_rate": 9.036279043803565e-06, |
|
"loss": 1.5649, |
|
"step": 22413 |
|
}, |
|
{ |
|
"epoch": 0.5681414025073251, |
|
"grad_norm": 1.2823692560195923, |
|
"learning_rate": 8.971737940243147e-06, |
|
"loss": 1.5561, |
|
"step": 22444 |
|
}, |
|
{ |
|
"epoch": 0.5689261282013961, |
|
"grad_norm": 1.3445894718170166, |
|
"learning_rate": 8.907377713441592e-06, |
|
"loss": 1.5296, |
|
"step": 22475 |
|
}, |
|
{ |
|
"epoch": 0.5697108538954669, |
|
"grad_norm": 1.3359887599945068, |
|
"learning_rate": 8.843199089695293e-06, |
|
"loss": 1.5299, |
|
"step": 22506 |
|
}, |
|
{ |
|
"epoch": 0.5704955795895378, |
|
"grad_norm": 1.4024282693862915, |
|
"learning_rate": 8.779202793251311e-06, |
|
"loss": 1.555, |
|
"step": 22537 |
|
}, |
|
{ |
|
"epoch": 0.5712803052836087, |
|
"grad_norm": 1.402908444404602, |
|
"learning_rate": 8.715389546299149e-06, |
|
"loss": 1.5442, |
|
"step": 22568 |
|
}, |
|
{ |
|
"epoch": 0.5720650309776796, |
|
"grad_norm": 1.3054429292678833, |
|
"learning_rate": 8.651760068962617e-06, |
|
"loss": 1.5491, |
|
"step": 22599 |
|
}, |
|
{ |
|
"epoch": 0.5728497566717505, |
|
"grad_norm": 1.314642071723938, |
|
"learning_rate": 8.588315079291733e-06, |
|
"loss": 1.531, |
|
"step": 22630 |
|
}, |
|
{ |
|
"epoch": 0.5736344823658214, |
|
"grad_norm": 1.2906594276428223, |
|
"learning_rate": 8.52505529325457e-06, |
|
"loss": 1.525, |
|
"step": 22661 |
|
}, |
|
{ |
|
"epoch": 0.5744192080598923, |
|
"grad_norm": 1.391607403755188, |
|
"learning_rate": 8.461981424729216e-06, |
|
"loss": 1.5578, |
|
"step": 22692 |
|
}, |
|
{ |
|
"epoch": 0.5752039337539632, |
|
"grad_norm": 1.5275055170059204, |
|
"learning_rate": 8.399094185495725e-06, |
|
"loss": 1.5468, |
|
"step": 22723 |
|
}, |
|
{ |
|
"epoch": 0.5759886594480341, |
|
"grad_norm": 1.4094804525375366, |
|
"learning_rate": 8.336394285228017e-06, |
|
"loss": 1.5336, |
|
"step": 22754 |
|
}, |
|
{ |
|
"epoch": 0.576773385142105, |
|
"grad_norm": 1.4096417427062988, |
|
"learning_rate": 8.273882431485952e-06, |
|
"loss": 1.5386, |
|
"step": 22785 |
|
}, |
|
{ |
|
"epoch": 0.5775581108361758, |
|
"grad_norm": 1.4015659093856812, |
|
"learning_rate": 8.211559329707316e-06, |
|
"loss": 1.5514, |
|
"step": 22816 |
|
}, |
|
{ |
|
"epoch": 0.5783428365302468, |
|
"grad_norm": 1.4353171586990356, |
|
"learning_rate": 8.149425683199823e-06, |
|
"loss": 1.5432, |
|
"step": 22847 |
|
}, |
|
{ |
|
"epoch": 0.5791275622243176, |
|
"grad_norm": 1.3493109941482544, |
|
"learning_rate": 8.08748219313325e-06, |
|
"loss": 1.5387, |
|
"step": 22878 |
|
}, |
|
{ |
|
"epoch": 0.5799122879183886, |
|
"grad_norm": 1.376868486404419, |
|
"learning_rate": 8.025729558531453e-06, |
|
"loss": 1.5397, |
|
"step": 22909 |
|
}, |
|
{ |
|
"epoch": 0.5806970136124594, |
|
"grad_norm": 1.4415427446365356, |
|
"learning_rate": 7.964168476264508e-06, |
|
"loss": 1.5556, |
|
"step": 22940 |
|
}, |
|
{ |
|
"epoch": 0.5814817393065304, |
|
"grad_norm": 1.4281046390533447, |
|
"learning_rate": 7.902799641040884e-06, |
|
"loss": 1.5312, |
|
"step": 22971 |
|
}, |
|
{ |
|
"epoch": 0.5822664650006012, |
|
"grad_norm": 1.372336983680725, |
|
"learning_rate": 7.841623745399523e-06, |
|
"loss": 1.5437, |
|
"step": 23002 |
|
}, |
|
{ |
|
"epoch": 0.583051190694672, |
|
"grad_norm": 1.3720817565917969, |
|
"learning_rate": 7.780641479702114e-06, |
|
"loss": 1.5599, |
|
"step": 23033 |
|
}, |
|
{ |
|
"epoch": 0.583835916388743, |
|
"grad_norm": 1.3714765310287476, |
|
"learning_rate": 7.719853532125227e-06, |
|
"loss": 1.5256, |
|
"step": 23064 |
|
}, |
|
{ |
|
"epoch": 0.5846206420828138, |
|
"grad_norm": 1.3198277950286865, |
|
"learning_rate": 7.65926058865258e-06, |
|
"loss": 1.5609, |
|
"step": 23095 |
|
}, |
|
{ |
|
"epoch": 0.5854053677768848, |
|
"grad_norm": 1.3970394134521484, |
|
"learning_rate": 7.598863333067313e-06, |
|
"loss": 1.552, |
|
"step": 23126 |
|
}, |
|
{ |
|
"epoch": 0.5861900934709556, |
|
"grad_norm": 1.3451225757598877, |
|
"learning_rate": 7.538662446944253e-06, |
|
"loss": 1.5407, |
|
"step": 23157 |
|
}, |
|
{ |
|
"epoch": 0.5869748191650266, |
|
"grad_norm": 1.3626407384872437, |
|
"learning_rate": 7.478658609642211e-06, |
|
"loss": 1.528, |
|
"step": 23188 |
|
}, |
|
{ |
|
"epoch": 0.5877595448590974, |
|
"grad_norm": 1.295155644416809, |
|
"learning_rate": 7.418852498296327e-06, |
|
"loss": 1.5396, |
|
"step": 23219 |
|
}, |
|
{ |
|
"epoch": 0.5885442705531684, |
|
"grad_norm": 1.4162577390670776, |
|
"learning_rate": 7.359244787810457e-06, |
|
"loss": 1.5442, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.5893289962472392, |
|
"grad_norm": 1.4795522689819336, |
|
"learning_rate": 7.299836150849493e-06, |
|
"loss": 1.5724, |
|
"step": 23281 |
|
}, |
|
{ |
|
"epoch": 0.5901137219413101, |
|
"grad_norm": 1.4080073833465576, |
|
"learning_rate": 7.240627257831847e-06, |
|
"loss": 1.5673, |
|
"step": 23312 |
|
}, |
|
{ |
|
"epoch": 0.590898447635381, |
|
"grad_norm": 1.2865021228790283, |
|
"learning_rate": 7.1816187769218195e-06, |
|
"loss": 1.5529, |
|
"step": 23343 |
|
}, |
|
{ |
|
"epoch": 0.5916831733294519, |
|
"grad_norm": 2.568460464477539, |
|
"learning_rate": 7.1228113740220895e-06, |
|
"loss": 1.5379, |
|
"step": 23374 |
|
}, |
|
{ |
|
"epoch": 0.5924678990235228, |
|
"grad_norm": 1.4487184286117554, |
|
"learning_rate": 7.064205712766226e-06, |
|
"loss": 1.5417, |
|
"step": 23405 |
|
}, |
|
{ |
|
"epoch": 0.5932526247175937, |
|
"grad_norm": 1.3384840488433838, |
|
"learning_rate": 7.005802454511129e-06, |
|
"loss": 1.5481, |
|
"step": 23436 |
|
}, |
|
{ |
|
"epoch": 0.5940373504116646, |
|
"grad_norm": 1.3432554006576538, |
|
"learning_rate": 6.947602258329639e-06, |
|
"loss": 1.521, |
|
"step": 23467 |
|
}, |
|
{ |
|
"epoch": 0.5948220761057355, |
|
"grad_norm": 1.3277153968811035, |
|
"learning_rate": 6.889605781003078e-06, |
|
"loss": 1.5348, |
|
"step": 23498 |
|
}, |
|
{ |
|
"epoch": 0.5956068017998063, |
|
"grad_norm": 1.4018425941467285, |
|
"learning_rate": 6.831813677013776e-06, |
|
"loss": 1.5319, |
|
"step": 23529 |
|
}, |
|
{ |
|
"epoch": 0.5963915274938772, |
|
"grad_norm": 1.44899582862854, |
|
"learning_rate": 6.774226598537792e-06, |
|
"loss": 1.5624, |
|
"step": 23560 |
|
}, |
|
{ |
|
"epoch": 0.5971762531879481, |
|
"grad_norm": 1.4060876369476318, |
|
"learning_rate": 6.716845195437482e-06, |
|
"loss": 1.5487, |
|
"step": 23591 |
|
}, |
|
{ |
|
"epoch": 0.597960978882019, |
|
"grad_norm": 1.4121522903442383, |
|
"learning_rate": 6.659670115254168e-06, |
|
"loss": 1.5332, |
|
"step": 23622 |
|
}, |
|
{ |
|
"epoch": 0.5987457045760899, |
|
"grad_norm": 1.3269188404083252, |
|
"learning_rate": 6.602702003200872e-06, |
|
"loss": 1.5276, |
|
"step": 23653 |
|
}, |
|
{ |
|
"epoch": 0.5995304302701608, |
|
"grad_norm": 1.3662550449371338, |
|
"learning_rate": 6.545941502154992e-06, |
|
"loss": 1.5629, |
|
"step": 23684 |
|
}, |
|
{ |
|
"epoch": 0.6003151559642317, |
|
"grad_norm": 1.4438221454620361, |
|
"learning_rate": 6.489389252651057e-06, |
|
"loss": 1.5496, |
|
"step": 23715 |
|
}, |
|
{ |
|
"epoch": 0.6010998816583026, |
|
"grad_norm": 1.422269344329834, |
|
"learning_rate": 6.4330458928735325e-06, |
|
"loss": 1.533, |
|
"step": 23746 |
|
}, |
|
{ |
|
"epoch": 0.6018846073523735, |
|
"grad_norm": 1.3922473192214966, |
|
"learning_rate": 6.376912058649559e-06, |
|
"loss": 1.5198, |
|
"step": 23777 |
|
}, |
|
{ |
|
"epoch": 0.6026693330464443, |
|
"grad_norm": 1.4476711750030518, |
|
"learning_rate": 6.320988383441845e-06, |
|
"loss": 1.55, |
|
"step": 23808 |
|
}, |
|
{ |
|
"epoch": 0.6034540587405153, |
|
"grad_norm": 1.3881078958511353, |
|
"learning_rate": 6.265275498341452e-06, |
|
"loss": 1.524, |
|
"step": 23839 |
|
}, |
|
{ |
|
"epoch": 0.6042387844345861, |
|
"grad_norm": 1.4356231689453125, |
|
"learning_rate": 6.209774032060714e-06, |
|
"loss": 1.5334, |
|
"step": 23870 |
|
}, |
|
{ |
|
"epoch": 0.6050235101286571, |
|
"grad_norm": 1.34247624874115, |
|
"learning_rate": 6.1544846109261365e-06, |
|
"loss": 1.5309, |
|
"step": 23901 |
|
}, |
|
{ |
|
"epoch": 0.6058082358227279, |
|
"grad_norm": 1.3616281747817993, |
|
"learning_rate": 6.099407858871342e-06, |
|
"loss": 1.5202, |
|
"step": 23932 |
|
}, |
|
{ |
|
"epoch": 0.6065929615167989, |
|
"grad_norm": 1.4779770374298096, |
|
"learning_rate": 6.044544397429958e-06, |
|
"loss": 1.5266, |
|
"step": 23963 |
|
}, |
|
{ |
|
"epoch": 0.6073776872108697, |
|
"grad_norm": 1.3740448951721191, |
|
"learning_rate": 5.989894845728708e-06, |
|
"loss": 1.5251, |
|
"step": 23994 |
|
}, |
|
{ |
|
"epoch": 0.6081624129049406, |
|
"grad_norm": 1.3835887908935547, |
|
"learning_rate": 5.9354598204803605e-06, |
|
"loss": 1.5349, |
|
"step": 24025 |
|
}, |
|
{ |
|
"epoch": 0.6089471385990115, |
|
"grad_norm": 1.419488549232483, |
|
"learning_rate": 5.881239935976762e-06, |
|
"loss": 1.5236, |
|
"step": 24056 |
|
}, |
|
{ |
|
"epoch": 0.6097318642930823, |
|
"grad_norm": 1.3918389081954956, |
|
"learning_rate": 5.827235804081954e-06, |
|
"loss": 1.5534, |
|
"step": 24087 |
|
}, |
|
{ |
|
"epoch": 0.6105165899871533, |
|
"grad_norm": 1.4750800132751465, |
|
"learning_rate": 5.773448034225221e-06, |
|
"loss": 1.5322, |
|
"step": 24118 |
|
}, |
|
{ |
|
"epoch": 0.6113013156812241, |
|
"grad_norm": 1.4278340339660645, |
|
"learning_rate": 5.719877233394228e-06, |
|
"loss": 1.5626, |
|
"step": 24149 |
|
}, |
|
{ |
|
"epoch": 0.6120860413752951, |
|
"grad_norm": 1.43100106716156, |
|
"learning_rate": 5.666524006128191e-06, |
|
"loss": 1.5411, |
|
"step": 24180 |
|
}, |
|
{ |
|
"epoch": 0.6128707670693659, |
|
"grad_norm": 1.397022008895874, |
|
"learning_rate": 5.613388954511015e-06, |
|
"loss": 1.5233, |
|
"step": 24211 |
|
}, |
|
{ |
|
"epoch": 0.6136554927634369, |
|
"grad_norm": 1.2984530925750732, |
|
"learning_rate": 5.560472678164552e-06, |
|
"loss": 1.5487, |
|
"step": 24242 |
|
}, |
|
{ |
|
"epoch": 0.6144402184575077, |
|
"grad_norm": 1.318934679031372, |
|
"learning_rate": 5.507775774241775e-06, |
|
"loss": 1.5627, |
|
"step": 24273 |
|
}, |
|
{ |
|
"epoch": 0.6152249441515786, |
|
"grad_norm": 1.4760456085205078, |
|
"learning_rate": 5.4552988374200945e-06, |
|
"loss": 1.5222, |
|
"step": 24304 |
|
}, |
|
{ |
|
"epoch": 0.6160096698456495, |
|
"grad_norm": 1.350392460823059, |
|
"learning_rate": 5.403042459894597e-06, |
|
"loss": 1.535, |
|
"step": 24335 |
|
}, |
|
{ |
|
"epoch": 0.6167943955397204, |
|
"grad_norm": 1.3857702016830444, |
|
"learning_rate": 5.3510072313714135e-06, |
|
"loss": 1.5483, |
|
"step": 24366 |
|
}, |
|
{ |
|
"epoch": 0.6175791212337913, |
|
"grad_norm": 1.4854798316955566, |
|
"learning_rate": 5.2991937390610205e-06, |
|
"loss": 1.5381, |
|
"step": 24397 |
|
}, |
|
{ |
|
"epoch": 0.6183638469278622, |
|
"grad_norm": 1.3600910902023315, |
|
"learning_rate": 5.247602567671625e-06, |
|
"loss": 1.5277, |
|
"step": 24428 |
|
}, |
|
{ |
|
"epoch": 0.6191485726219331, |
|
"grad_norm": 1.3631632328033447, |
|
"learning_rate": 5.196234299402603e-06, |
|
"loss": 1.5583, |
|
"step": 24459 |
|
}, |
|
{ |
|
"epoch": 0.619933298316004, |
|
"grad_norm": 1.4225085973739624, |
|
"learning_rate": 5.145089513937865e-06, |
|
"loss": 1.5346, |
|
"step": 24490 |
|
}, |
|
{ |
|
"epoch": 0.6207180240100749, |
|
"grad_norm": 1.3548002243041992, |
|
"learning_rate": 5.094168788439369e-06, |
|
"loss": 1.546, |
|
"step": 24521 |
|
}, |
|
{ |
|
"epoch": 0.6215027497041458, |
|
"grad_norm": 1.4630082845687866, |
|
"learning_rate": 5.043472697540594e-06, |
|
"loss": 1.549, |
|
"step": 24552 |
|
}, |
|
{ |
|
"epoch": 0.6222874753982166, |
|
"grad_norm": 1.4638261795043945, |
|
"learning_rate": 4.993001813340012e-06, |
|
"loss": 1.5224, |
|
"step": 24583 |
|
}, |
|
{ |
|
"epoch": 0.6230722010922876, |
|
"grad_norm": 1.3274465799331665, |
|
"learning_rate": 4.942756705394702e-06, |
|
"loss": 1.538, |
|
"step": 24614 |
|
}, |
|
{ |
|
"epoch": 0.6238569267863584, |
|
"grad_norm": 1.4302935600280762, |
|
"learning_rate": 4.892737940713884e-06, |
|
"loss": 1.545, |
|
"step": 24645 |
|
}, |
|
{ |
|
"epoch": 0.6246416524804294, |
|
"grad_norm": 1.4292621612548828, |
|
"learning_rate": 4.842946083752511e-06, |
|
"loss": 1.5275, |
|
"step": 24676 |
|
}, |
|
{ |
|
"epoch": 0.6254263781745002, |
|
"grad_norm": 1.3631361722946167, |
|
"learning_rate": 4.79338169640493e-06, |
|
"loss": 1.5552, |
|
"step": 24707 |
|
}, |
|
{ |
|
"epoch": 0.6262111038685712, |
|
"grad_norm": 1.4284039735794067, |
|
"learning_rate": 4.74404533799851e-06, |
|
"loss": 1.5298, |
|
"step": 24738 |
|
}, |
|
{ |
|
"epoch": 0.626995829562642, |
|
"grad_norm": 1.4611119031906128, |
|
"learning_rate": 4.694937565287344e-06, |
|
"loss": 1.5414, |
|
"step": 24769 |
|
}, |
|
{ |
|
"epoch": 0.6277805552567128, |
|
"grad_norm": 1.37677800655365, |
|
"learning_rate": 4.646058932445985e-06, |
|
"loss": 1.5392, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.6285652809507838, |
|
"grad_norm": 1.4582575559616089, |
|
"learning_rate": 4.597409991063148e-06, |
|
"loss": 1.5317, |
|
"step": 24831 |
|
}, |
|
{ |
|
"epoch": 0.6293500066448546, |
|
"grad_norm": 1.3665950298309326, |
|
"learning_rate": 4.5489912901355375e-06, |
|
"loss": 1.5514, |
|
"step": 24862 |
|
}, |
|
{ |
|
"epoch": 0.6301347323389256, |
|
"grad_norm": 1.3817001581192017, |
|
"learning_rate": 4.500803376061608e-06, |
|
"loss": 1.5343, |
|
"step": 24893 |
|
}, |
|
{ |
|
"epoch": 0.6309194580329964, |
|
"grad_norm": 1.4217463731765747, |
|
"learning_rate": 4.45284679263541e-06, |
|
"loss": 1.5247, |
|
"step": 24924 |
|
}, |
|
{ |
|
"epoch": 0.6317041837270674, |
|
"grad_norm": 1.3985430002212524, |
|
"learning_rate": 4.4051220810404775e-06, |
|
"loss": 1.5348, |
|
"step": 24955 |
|
}, |
|
{ |
|
"epoch": 0.6324889094211382, |
|
"grad_norm": 1.4616161584854126, |
|
"learning_rate": 4.3576297798437025e-06, |
|
"loss": 1.5563, |
|
"step": 24986 |
|
}, |
|
{ |
|
"epoch": 0.6332736351152092, |
|
"grad_norm": 1.3955610990524292, |
|
"learning_rate": 4.3103704249892436e-06, |
|
"loss": 1.5204, |
|
"step": 25017 |
|
}, |
|
{ |
|
"epoch": 0.63405836080928, |
|
"grad_norm": 1.3720837831497192, |
|
"learning_rate": 4.263344549792487e-06, |
|
"loss": 1.5379, |
|
"step": 25048 |
|
}, |
|
{ |
|
"epoch": 0.6348430865033509, |
|
"grad_norm": 1.347891092300415, |
|
"learning_rate": 4.216552684934056e-06, |
|
"loss": 1.5285, |
|
"step": 25079 |
|
}, |
|
{ |
|
"epoch": 0.6356278121974218, |
|
"grad_norm": 1.5957375764846802, |
|
"learning_rate": 4.169995358453777e-06, |
|
"loss": 1.5163, |
|
"step": 25110 |
|
}, |
|
{ |
|
"epoch": 0.6364125378914927, |
|
"grad_norm": 1.3431944847106934, |
|
"learning_rate": 4.123673095744757e-06, |
|
"loss": 1.5378, |
|
"step": 25141 |
|
}, |
|
{ |
|
"epoch": 0.6371972635855636, |
|
"grad_norm": 1.4405794143676758, |
|
"learning_rate": 4.077586419547435e-06, |
|
"loss": 1.5563, |
|
"step": 25172 |
|
}, |
|
{ |
|
"epoch": 0.6379819892796345, |
|
"grad_norm": 1.3969746828079224, |
|
"learning_rate": 4.03173584994368e-06, |
|
"loss": 1.5441, |
|
"step": 25203 |
|
}, |
|
{ |
|
"epoch": 0.6387667149737054, |
|
"grad_norm": 1.542013168334961, |
|
"learning_rate": 3.986121904350948e-06, |
|
"loss": 1.5249, |
|
"step": 25234 |
|
}, |
|
{ |
|
"epoch": 0.6395514406677762, |
|
"grad_norm": 1.4267256259918213, |
|
"learning_rate": 3.940745097516407e-06, |
|
"loss": 1.5184, |
|
"step": 25265 |
|
}, |
|
{ |
|
"epoch": 0.6403361663618471, |
|
"grad_norm": 1.331272840499878, |
|
"learning_rate": 3.89560594151116e-06, |
|
"loss": 1.5437, |
|
"step": 25296 |
|
}, |
|
{ |
|
"epoch": 0.641120892055918, |
|
"grad_norm": 1.368691086769104, |
|
"learning_rate": 3.850704945724456e-06, |
|
"loss": 1.5265, |
|
"step": 25327 |
|
}, |
|
{ |
|
"epoch": 0.6419056177499889, |
|
"grad_norm": 1.3770484924316406, |
|
"learning_rate": 3.8060426168579077e-06, |
|
"loss": 1.5291, |
|
"step": 25358 |
|
}, |
|
{ |
|
"epoch": 0.6426903434440598, |
|
"grad_norm": 1.4727221727371216, |
|
"learning_rate": 3.7616194589198407e-06, |
|
"loss": 1.5326, |
|
"step": 25389 |
|
}, |
|
{ |
|
"epoch": 0.6434750691381307, |
|
"grad_norm": 1.3571360111236572, |
|
"learning_rate": 3.7174359732195574e-06, |
|
"loss": 1.5278, |
|
"step": 25420 |
|
}, |
|
{ |
|
"epoch": 0.6442597948322016, |
|
"grad_norm": 1.4054335355758667, |
|
"learning_rate": 3.673492658361677e-06, |
|
"loss": 1.5405, |
|
"step": 25451 |
|
}, |
|
{ |
|
"epoch": 0.6450445205262725, |
|
"grad_norm": 1.4510763883590698, |
|
"learning_rate": 3.6297900102405467e-06, |
|
"loss": 1.5409, |
|
"step": 25482 |
|
}, |
|
{ |
|
"epoch": 0.6458292462203434, |
|
"grad_norm": 1.5653456449508667, |
|
"learning_rate": 3.586328522034607e-06, |
|
"loss": 1.5224, |
|
"step": 25513 |
|
}, |
|
{ |
|
"epoch": 0.6466139719144143, |
|
"grad_norm": 1.4818406105041504, |
|
"learning_rate": 3.543108684200838e-06, |
|
"loss": 1.5251, |
|
"step": 25544 |
|
}, |
|
{ |
|
"epoch": 0.6473986976084851, |
|
"grad_norm": 1.4254684448242188, |
|
"learning_rate": 3.5001309844692464e-06, |
|
"loss": 1.5219, |
|
"step": 25575 |
|
}, |
|
{ |
|
"epoch": 0.6481834233025561, |
|
"grad_norm": 1.348809838294983, |
|
"learning_rate": 3.4573959078373215e-06, |
|
"loss": 1.5285, |
|
"step": 25606 |
|
}, |
|
{ |
|
"epoch": 0.6489681489966269, |
|
"grad_norm": 1.4553576707839966, |
|
"learning_rate": 3.4149039365646063e-06, |
|
"loss": 1.5419, |
|
"step": 25637 |
|
}, |
|
{ |
|
"epoch": 0.6497528746906979, |
|
"grad_norm": 1.412490963935852, |
|
"learning_rate": 3.3726555501672143e-06, |
|
"loss": 1.5186, |
|
"step": 25668 |
|
}, |
|
{ |
|
"epoch": 0.6505376003847687, |
|
"grad_norm": 1.4104843139648438, |
|
"learning_rate": 3.33065122541244e-06, |
|
"loss": 1.5254, |
|
"step": 25699 |
|
}, |
|
{ |
|
"epoch": 0.6513223260788397, |
|
"grad_norm": 1.3806548118591309, |
|
"learning_rate": 3.288891436313385e-06, |
|
"loss": 1.5272, |
|
"step": 25730 |
|
}, |
|
{ |
|
"epoch": 0.6521070517729105, |
|
"grad_norm": 1.4207285642623901, |
|
"learning_rate": 3.2473766541235963e-06, |
|
"loss": 1.536, |
|
"step": 25761 |
|
}, |
|
{ |
|
"epoch": 0.6528917774669813, |
|
"grad_norm": 1.3559178113937378, |
|
"learning_rate": 3.2061073473317466e-06, |
|
"loss": 1.5394, |
|
"step": 25792 |
|
}, |
|
{ |
|
"epoch": 0.6536765031610523, |
|
"grad_norm": 1.3517690896987915, |
|
"learning_rate": 3.1650839816563444e-06, |
|
"loss": 1.5488, |
|
"step": 25823 |
|
}, |
|
{ |
|
"epoch": 0.6544612288551231, |
|
"grad_norm": 1.3978461027145386, |
|
"learning_rate": 3.1243070200405093e-06, |
|
"loss": 1.5261, |
|
"step": 25854 |
|
}, |
|
{ |
|
"epoch": 0.6552459545491941, |
|
"grad_norm": 1.3550540208816528, |
|
"learning_rate": 3.0837769226467e-06, |
|
"loss": 1.5254, |
|
"step": 25885 |
|
}, |
|
{ |
|
"epoch": 0.6560306802432649, |
|
"grad_norm": 1.3790268898010254, |
|
"learning_rate": 3.0434941468515666e-06, |
|
"loss": 1.5224, |
|
"step": 25916 |
|
}, |
|
{ |
|
"epoch": 0.6568154059373359, |
|
"grad_norm": 1.3558413982391357, |
|
"learning_rate": 3.003459147240753e-06, |
|
"loss": 1.5179, |
|
"step": 25947 |
|
}, |
|
{ |
|
"epoch": 0.6576001316314067, |
|
"grad_norm": 1.3683024644851685, |
|
"learning_rate": 2.9636723756037875e-06, |
|
"loss": 1.5191, |
|
"step": 25978 |
|
}, |
|
{ |
|
"epoch": 0.6583848573254777, |
|
"grad_norm": 1.4349849224090576, |
|
"learning_rate": 2.9241342809289833e-06, |
|
"loss": 1.5417, |
|
"step": 26009 |
|
}, |
|
{ |
|
"epoch": 0.6591695830195485, |
|
"grad_norm": 1.3950988054275513, |
|
"learning_rate": 2.8848453093983594e-06, |
|
"loss": 1.5267, |
|
"step": 26040 |
|
}, |
|
{ |
|
"epoch": 0.6599543087136194, |
|
"grad_norm": 1.3628458976745605, |
|
"learning_rate": 2.8458059043826257e-06, |
|
"loss": 1.5294, |
|
"step": 26071 |
|
}, |
|
{ |
|
"epoch": 0.6607390344076903, |
|
"grad_norm": 1.3483256101608276, |
|
"learning_rate": 2.807016506436172e-06, |
|
"loss": 1.5498, |
|
"step": 26102 |
|
}, |
|
{ |
|
"epoch": 0.6615237601017612, |
|
"grad_norm": 1.3618528842926025, |
|
"learning_rate": 2.7684775532920566e-06, |
|
"loss": 1.5271, |
|
"step": 26133 |
|
}, |
|
{ |
|
"epoch": 0.6623084857958321, |
|
"grad_norm": 1.49851393699646, |
|
"learning_rate": 2.7301894798571425e-06, |
|
"loss": 1.526, |
|
"step": 26164 |
|
}, |
|
{ |
|
"epoch": 0.663093211489903, |
|
"grad_norm": 1.5132079124450684, |
|
"learning_rate": 2.6921527182071386e-06, |
|
"loss": 1.5418, |
|
"step": 26195 |
|
}, |
|
{ |
|
"epoch": 0.6638779371839739, |
|
"grad_norm": 1.4265996217727661, |
|
"learning_rate": 2.654367697581725e-06, |
|
"loss": 1.5455, |
|
"step": 26226 |
|
}, |
|
{ |
|
"epoch": 0.6646626628780448, |
|
"grad_norm": 1.506589412689209, |
|
"learning_rate": 2.6168348443797175e-06, |
|
"loss": 1.5209, |
|
"step": 26257 |
|
}, |
|
{ |
|
"epoch": 0.6654473885721156, |
|
"grad_norm": 1.3662431240081787, |
|
"learning_rate": 2.5795545821542757e-06, |
|
"loss": 1.5169, |
|
"step": 26288 |
|
}, |
|
{ |
|
"epoch": 0.6662321142661866, |
|
"grad_norm": 1.4398752450942993, |
|
"learning_rate": 2.54252733160808e-06, |
|
"loss": 1.5491, |
|
"step": 26319 |
|
}, |
|
{ |
|
"epoch": 0.6670168399602574, |
|
"grad_norm": 1.4776362180709839, |
|
"learning_rate": 2.5057535105886294e-06, |
|
"loss": 1.5192, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.6678015656543284, |
|
"grad_norm": 1.3796826601028442, |
|
"learning_rate": 2.4692335340834953e-06, |
|
"loss": 1.5245, |
|
"step": 26381 |
|
}, |
|
{ |
|
"epoch": 0.6685862913483992, |
|
"grad_norm": 1.3923054933547974, |
|
"learning_rate": 2.432967814215639e-06, |
|
"loss": 1.5252, |
|
"step": 26412 |
|
}, |
|
{ |
|
"epoch": 0.6693710170424702, |
|
"grad_norm": 1.3372383117675781, |
|
"learning_rate": 2.396956760238794e-06, |
|
"loss": 1.5227, |
|
"step": 26443 |
|
}, |
|
{ |
|
"epoch": 0.670155742736541, |
|
"grad_norm": 1.3287001848220825, |
|
"learning_rate": 2.361200778532796e-06, |
|
"loss": 1.5335, |
|
"step": 26474 |
|
}, |
|
{ |
|
"epoch": 0.670940468430612, |
|
"grad_norm": 1.3403995037078857, |
|
"learning_rate": 2.325700272599049e-06, |
|
"loss": 1.5304, |
|
"step": 26505 |
|
}, |
|
{ |
|
"epoch": 0.6717251941246828, |
|
"grad_norm": 1.3469324111938477, |
|
"learning_rate": 2.2904556430559415e-06, |
|
"loss": 1.5329, |
|
"step": 26536 |
|
}, |
|
{ |
|
"epoch": 0.6725099198187536, |
|
"grad_norm": 1.4993536472320557, |
|
"learning_rate": 2.2554672876343106e-06, |
|
"loss": 1.5228, |
|
"step": 26567 |
|
}, |
|
{ |
|
"epoch": 0.6732946455128246, |
|
"grad_norm": 1.3785438537597656, |
|
"learning_rate": 2.220735601173002e-06, |
|
"loss": 1.516, |
|
"step": 26598 |
|
}, |
|
{ |
|
"epoch": 0.6740793712068954, |
|
"grad_norm": 1.3642317056655884, |
|
"learning_rate": 2.186260975614382e-06, |
|
"loss": 1.5467, |
|
"step": 26629 |
|
}, |
|
{ |
|
"epoch": 0.6748640969009664, |
|
"grad_norm": 1.3815925121307373, |
|
"learning_rate": 2.1520437999999034e-06, |
|
"loss": 1.5449, |
|
"step": 26660 |
|
}, |
|
{ |
|
"epoch": 0.6756488225950372, |
|
"grad_norm": 1.3854280710220337, |
|
"learning_rate": 2.1180844604657526e-06, |
|
"loss": 1.5177, |
|
"step": 26691 |
|
}, |
|
{ |
|
"epoch": 0.6764335482891082, |
|
"grad_norm": 1.4565620422363281, |
|
"learning_rate": 2.084383340238455e-06, |
|
"loss": 1.5119, |
|
"step": 26722 |
|
}, |
|
{ |
|
"epoch": 0.677218273983179, |
|
"grad_norm": 1.35818612575531, |
|
"learning_rate": 2.0509408196305704e-06, |
|
"loss": 1.5084, |
|
"step": 26753 |
|
}, |
|
{ |
|
"epoch": 0.6780029996772499, |
|
"grad_norm": 1.4125559329986572, |
|
"learning_rate": 2.017757276036403e-06, |
|
"loss": 1.5101, |
|
"step": 26784 |
|
}, |
|
{ |
|
"epoch": 0.6787877253713208, |
|
"grad_norm": 1.43025803565979, |
|
"learning_rate": 1.984833083927726e-06, |
|
"loss": 1.5318, |
|
"step": 26815 |
|
}, |
|
{ |
|
"epoch": 0.6795724510653917, |
|
"grad_norm": 1.3963549137115479, |
|
"learning_rate": 1.952168614849581e-06, |
|
"loss": 1.5248, |
|
"step": 26846 |
|
}, |
|
{ |
|
"epoch": 0.6803571767594626, |
|
"grad_norm": 1.4896256923675537, |
|
"learning_rate": 1.919764237416058e-06, |
|
"loss": 1.5409, |
|
"step": 26877 |
|
}, |
|
{ |
|
"epoch": 0.6811419024535335, |
|
"grad_norm": 1.3385494947433472, |
|
"learning_rate": 1.8876203173061463e-06, |
|
"loss": 1.5371, |
|
"step": 26908 |
|
}, |
|
{ |
|
"epoch": 0.6819266281476044, |
|
"grad_norm": 1.3572068214416504, |
|
"learning_rate": 1.8557372172596206e-06, |
|
"loss": 1.5394, |
|
"step": 26939 |
|
}, |
|
{ |
|
"epoch": 0.6827113538416753, |
|
"grad_norm": 1.455278992652893, |
|
"learning_rate": 1.8241152970729341e-06, |
|
"loss": 1.5345, |
|
"step": 26970 |
|
}, |
|
{ |
|
"epoch": 0.6834960795357462, |
|
"grad_norm": 1.4417409896850586, |
|
"learning_rate": 1.7927549135951572e-06, |
|
"loss": 1.5252, |
|
"step": 27001 |
|
}, |
|
{ |
|
"epoch": 0.684280805229817, |
|
"grad_norm": 1.4233667850494385, |
|
"learning_rate": 1.7616564207239477e-06, |
|
"loss": 1.5221, |
|
"step": 27032 |
|
}, |
|
{ |
|
"epoch": 0.6850655309238879, |
|
"grad_norm": 1.4328643083572388, |
|
"learning_rate": 1.730820169401584e-06, |
|
"loss": 1.508, |
|
"step": 27063 |
|
}, |
|
{ |
|
"epoch": 0.6858502566179588, |
|
"grad_norm": 1.3445032835006714, |
|
"learning_rate": 1.7002465076109558e-06, |
|
"loss": 1.5209, |
|
"step": 27094 |
|
}, |
|
{ |
|
"epoch": 0.6866349823120297, |
|
"grad_norm": 1.4214242696762085, |
|
"learning_rate": 1.6699357803716898e-06, |
|
"loss": 1.5297, |
|
"step": 27125 |
|
}, |
|
{ |
|
"epoch": 0.6874197080061006, |
|
"grad_norm": 1.3590694665908813, |
|
"learning_rate": 1.6398883297362305e-06, |
|
"loss": 1.5351, |
|
"step": 27156 |
|
}, |
|
{ |
|
"epoch": 0.6882044337001715, |
|
"grad_norm": 1.4039976596832275, |
|
"learning_rate": 1.6101044947859606e-06, |
|
"loss": 1.5529, |
|
"step": 27187 |
|
}, |
|
{ |
|
"epoch": 0.6889891593942424, |
|
"grad_norm": 1.3939241170883179, |
|
"learning_rate": 1.5805846116274114e-06, |
|
"loss": 1.509, |
|
"step": 27218 |
|
}, |
|
{ |
|
"epoch": 0.6897738850883133, |
|
"grad_norm": 1.4963489770889282, |
|
"learning_rate": 1.5513290133884611e-06, |
|
"loss": 1.5526, |
|
"step": 27249 |
|
}, |
|
{ |
|
"epoch": 0.6905586107823841, |
|
"grad_norm": 1.413089632987976, |
|
"learning_rate": 1.5223380302145512e-06, |
|
"loss": 1.5271, |
|
"step": 27280 |
|
}, |
|
{ |
|
"epoch": 0.6913433364764551, |
|
"grad_norm": 1.4136161804199219, |
|
"learning_rate": 1.4936119892649925e-06, |
|
"loss": 1.5365, |
|
"step": 27311 |
|
}, |
|
{ |
|
"epoch": 0.6921280621705259, |
|
"grad_norm": 1.4144634008407593, |
|
"learning_rate": 1.4651512147092482e-06, |
|
"loss": 1.5255, |
|
"step": 27342 |
|
}, |
|
{ |
|
"epoch": 0.6929127878645969, |
|
"grad_norm": 1.3424650430679321, |
|
"learning_rate": 1.4369560277232908e-06, |
|
"loss": 1.5275, |
|
"step": 27373 |
|
}, |
|
{ |
|
"epoch": 0.6936975135586677, |
|
"grad_norm": 1.4057984352111816, |
|
"learning_rate": 1.409026746485978e-06, |
|
"loss": 1.5273, |
|
"step": 27404 |
|
}, |
|
{ |
|
"epoch": 0.6944822392527387, |
|
"grad_norm": 1.4132764339447021, |
|
"learning_rate": 1.3813636861754464e-06, |
|
"loss": 1.5219, |
|
"step": 27435 |
|
}, |
|
{ |
|
"epoch": 0.6952669649468095, |
|
"grad_norm": 1.541971206665039, |
|
"learning_rate": 1.3539671589655773e-06, |
|
"loss": 1.5413, |
|
"step": 27466 |
|
} |
|
], |
|
"logging_steps": 31, |
|
"max_steps": 30517, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3052, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.037550548620044e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|