|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 4334, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00023073373327180433, |
|
"grad_norm": 0.686297595500946, |
|
"learning_rate": 4.6082949308755763e-07, |
|
"loss": 1.1177, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011536686663590216, |
|
"grad_norm": 0.7558704614639282, |
|
"learning_rate": 2.3041474654377884e-06, |
|
"loss": 1.1971, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0023073373327180432, |
|
"grad_norm": 0.5853979587554932, |
|
"learning_rate": 4.608294930875577e-06, |
|
"loss": 1.1555, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003461005999077065, |
|
"grad_norm": 0.6028280258178711, |
|
"learning_rate": 6.912442396313365e-06, |
|
"loss": 1.1553, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0046146746654360865, |
|
"grad_norm": 0.5894124507904053, |
|
"learning_rate": 9.216589861751153e-06, |
|
"loss": 1.1362, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005768343331795108, |
|
"grad_norm": 0.5509130358695984, |
|
"learning_rate": 1.152073732718894e-05, |
|
"loss": 1.1086, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00692201199815413, |
|
"grad_norm": 0.4827423095703125, |
|
"learning_rate": 1.382488479262673e-05, |
|
"loss": 1.0714, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008075680664513151, |
|
"grad_norm": 0.4206579923629761, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 1.0986, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.009229349330872173, |
|
"grad_norm": 0.4018802344799042, |
|
"learning_rate": 1.8433179723502307e-05, |
|
"loss": 1.0826, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010383017997231195, |
|
"grad_norm": 0.3892878293991089, |
|
"learning_rate": 2.0737327188940094e-05, |
|
"loss": 1.0754, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.011536686663590217, |
|
"grad_norm": 0.42607298493385315, |
|
"learning_rate": 2.304147465437788e-05, |
|
"loss": 1.0598, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012690355329949238, |
|
"grad_norm": 0.4032100439071655, |
|
"learning_rate": 2.534562211981567e-05, |
|
"loss": 1.0398, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01384402399630826, |
|
"grad_norm": 0.300424724817276, |
|
"learning_rate": 2.764976958525346e-05, |
|
"loss": 1.0314, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.014997692662667282, |
|
"grad_norm": 0.3120896816253662, |
|
"learning_rate": 2.9953917050691244e-05, |
|
"loss": 1.0325, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.016151361329026302, |
|
"grad_norm": 0.3133822977542877, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 1.0001, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.017305029995385326, |
|
"grad_norm": 0.3224703371524811, |
|
"learning_rate": 3.456221198156682e-05, |
|
"loss": 0.9706, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.018458698661744346, |
|
"grad_norm": 0.3161858022212982, |
|
"learning_rate": 3.6866359447004614e-05, |
|
"loss": 1.0638, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01961236732810337, |
|
"grad_norm": 0.3286690413951874, |
|
"learning_rate": 3.91705069124424e-05, |
|
"loss": 0.996, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02076603599446239, |
|
"grad_norm": 0.33011969923973083, |
|
"learning_rate": 4.147465437788019e-05, |
|
"loss": 0.9976, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.021919704660821413, |
|
"grad_norm": 0.32752472162246704, |
|
"learning_rate": 4.3778801843317974e-05, |
|
"loss": 1.0096, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.023073373327180433, |
|
"grad_norm": 0.36182036995887756, |
|
"learning_rate": 4.608294930875576e-05, |
|
"loss": 1.0075, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.024227041993539457, |
|
"grad_norm": 0.3445587158203125, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 1.002, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.025380710659898477, |
|
"grad_norm": 0.3711521029472351, |
|
"learning_rate": 5.069124423963134e-05, |
|
"loss": 0.9899, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0265343793262575, |
|
"grad_norm": 0.3891366124153137, |
|
"learning_rate": 5.2995391705069134e-05, |
|
"loss": 1.0221, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.02768804799261652, |
|
"grad_norm": 0.35788699984550476, |
|
"learning_rate": 5.529953917050692e-05, |
|
"loss": 0.9712, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02884171665897554, |
|
"grad_norm": 0.3537100553512573, |
|
"learning_rate": 5.76036866359447e-05, |
|
"loss": 0.9863, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.029995385325334564, |
|
"grad_norm": 0.3532801568508148, |
|
"learning_rate": 5.990783410138249e-05, |
|
"loss": 0.9795, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.031149053991693584, |
|
"grad_norm": 0.3615999221801758, |
|
"learning_rate": 6.221198156682027e-05, |
|
"loss": 1.0252, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.032302722658052604, |
|
"grad_norm": 0.37444955110549927, |
|
"learning_rate": 6.451612903225807e-05, |
|
"loss": 0.9857, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03345639132441163, |
|
"grad_norm": 0.3502795696258545, |
|
"learning_rate": 6.682027649769586e-05, |
|
"loss": 0.9619, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.03461005999077065, |
|
"grad_norm": 0.40353333950042725, |
|
"learning_rate": 6.912442396313364e-05, |
|
"loss": 0.9964, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03576372865712967, |
|
"grad_norm": 0.36437052488327026, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 1.0316, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.03691739732348869, |
|
"grad_norm": 0.36226072907447815, |
|
"learning_rate": 7.373271889400923e-05, |
|
"loss": 0.973, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03807106598984772, |
|
"grad_norm": 0.33833077549934387, |
|
"learning_rate": 7.603686635944701e-05, |
|
"loss": 1.0132, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.03922473465620674, |
|
"grad_norm": 0.3641466498374939, |
|
"learning_rate": 7.83410138248848e-05, |
|
"loss": 0.9902, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04037840332256576, |
|
"grad_norm": 0.3401365578174591, |
|
"learning_rate": 8.064516129032258e-05, |
|
"loss": 0.9581, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04153207198892478, |
|
"grad_norm": 0.4255409836769104, |
|
"learning_rate": 8.294930875576037e-05, |
|
"loss": 1.0141, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0426857406552838, |
|
"grad_norm": 0.34667283296585083, |
|
"learning_rate": 8.525345622119815e-05, |
|
"loss": 0.9797, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.043839409321642826, |
|
"grad_norm": 0.34790462255477905, |
|
"learning_rate": 8.755760368663595e-05, |
|
"loss": 1.0209, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.044993077988001846, |
|
"grad_norm": 0.32455047965049744, |
|
"learning_rate": 8.986175115207374e-05, |
|
"loss": 0.9583, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.046146746654360866, |
|
"grad_norm": 0.31896835565567017, |
|
"learning_rate": 9.216589861751152e-05, |
|
"loss": 1.0452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04730041532071989, |
|
"grad_norm": 0.35029253363609314, |
|
"learning_rate": 9.447004608294931e-05, |
|
"loss": 0.9895, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.048454083987078914, |
|
"grad_norm": 0.3429763615131378, |
|
"learning_rate": 9.677419354838711e-05, |
|
"loss": 1.0308, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.049607752653437934, |
|
"grad_norm": 0.30916911363601685, |
|
"learning_rate": 9.907834101382489e-05, |
|
"loss": 0.9656, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.050761421319796954, |
|
"grad_norm": 0.3224053680896759, |
|
"learning_rate": 0.00010138248847926268, |
|
"loss": 0.9742, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.051915089986155974, |
|
"grad_norm": 0.303576797246933, |
|
"learning_rate": 0.00010368663594470047, |
|
"loss": 1.0283, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.053068758652515, |
|
"grad_norm": 0.3039628863334656, |
|
"learning_rate": 0.00010599078341013827, |
|
"loss": 0.9651, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05422242731887402, |
|
"grad_norm": 0.29959338903427124, |
|
"learning_rate": 0.00010829493087557605, |
|
"loss": 0.9649, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.05537609598523304, |
|
"grad_norm": 0.31773313879966736, |
|
"learning_rate": 0.00011059907834101384, |
|
"loss": 1.0011, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05652976465159206, |
|
"grad_norm": 0.2978110909461975, |
|
"learning_rate": 0.00011290322580645163, |
|
"loss": 0.974, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.05768343331795108, |
|
"grad_norm": 0.30528807640075684, |
|
"learning_rate": 0.0001152073732718894, |
|
"loss": 1.0014, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05883710198431011, |
|
"grad_norm": 0.2934586703777313, |
|
"learning_rate": 0.00011751152073732718, |
|
"loss": 1.012, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.05999077065066913, |
|
"grad_norm": 0.305656373500824, |
|
"learning_rate": 0.00011981566820276497, |
|
"loss": 0.9923, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06114443931702815, |
|
"grad_norm": 0.2879476845264435, |
|
"learning_rate": 0.00012211981566820275, |
|
"loss": 0.9748, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.06229810798338717, |
|
"grad_norm": 0.2932344377040863, |
|
"learning_rate": 0.00012442396313364055, |
|
"loss": 0.9904, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06345177664974619, |
|
"grad_norm": 0.28333616256713867, |
|
"learning_rate": 0.00012672811059907834, |
|
"loss": 0.9694, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06460544531610521, |
|
"grad_norm": 0.3106706738471985, |
|
"learning_rate": 0.00012903225806451613, |
|
"loss": 0.9602, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06575911398246424, |
|
"grad_norm": 0.30486252903938293, |
|
"learning_rate": 0.00013133640552995393, |
|
"loss": 0.997, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.06691278264882326, |
|
"grad_norm": 0.2930590808391571, |
|
"learning_rate": 0.00013364055299539172, |
|
"loss": 0.9839, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06806645131518228, |
|
"grad_norm": 0.3167310655117035, |
|
"learning_rate": 0.0001359447004608295, |
|
"loss": 0.9784, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.0692201199815413, |
|
"grad_norm": 0.28749707341194153, |
|
"learning_rate": 0.00013824884792626728, |
|
"loss": 0.996, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07037378864790032, |
|
"grad_norm": 0.33814895153045654, |
|
"learning_rate": 0.00014055299539170507, |
|
"loss": 1.013, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.07152745731425934, |
|
"grad_norm": 0.28523507714271545, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.9904, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07268112598061836, |
|
"grad_norm": 0.27578404545783997, |
|
"learning_rate": 0.00014516129032258066, |
|
"loss": 1.0064, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.07383479464697738, |
|
"grad_norm": 0.2809511721134186, |
|
"learning_rate": 0.00014746543778801845, |
|
"loss": 0.9846, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0749884633133364, |
|
"grad_norm": 0.28782716393470764, |
|
"learning_rate": 0.00014976958525345625, |
|
"loss": 0.9869, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07614213197969544, |
|
"grad_norm": 0.2717392146587372, |
|
"learning_rate": 0.00015207373271889401, |
|
"loss": 0.962, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07729580064605446, |
|
"grad_norm": 0.27220791578292847, |
|
"learning_rate": 0.0001543778801843318, |
|
"loss": 0.9813, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.07844946931241348, |
|
"grad_norm": 0.3024996817111969, |
|
"learning_rate": 0.0001566820276497696, |
|
"loss": 0.9887, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0796031379787725, |
|
"grad_norm": 0.27522554993629456, |
|
"learning_rate": 0.0001589861751152074, |
|
"loss": 0.9869, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.08075680664513152, |
|
"grad_norm": 0.27453291416168213, |
|
"learning_rate": 0.00016129032258064516, |
|
"loss": 0.9365, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08191047531149054, |
|
"grad_norm": 0.27664312720298767, |
|
"learning_rate": 0.00016359447004608295, |
|
"loss": 0.9611, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.08306414397784956, |
|
"grad_norm": 0.2761642038822174, |
|
"learning_rate": 0.00016589861751152075, |
|
"loss": 0.9935, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08421781264420858, |
|
"grad_norm": 0.2900699973106384, |
|
"learning_rate": 0.00016820276497695851, |
|
"loss": 0.9528, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.0853714813105676, |
|
"grad_norm": 0.2699083387851715, |
|
"learning_rate": 0.0001705069124423963, |
|
"loss": 0.9612, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08652514997692663, |
|
"grad_norm": 0.26819881796836853, |
|
"learning_rate": 0.0001728110599078341, |
|
"loss": 0.9652, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08767881864328565, |
|
"grad_norm": 0.2670578360557556, |
|
"learning_rate": 0.0001751152073732719, |
|
"loss": 0.944, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08883248730964467, |
|
"grad_norm": 0.27193009853363037, |
|
"learning_rate": 0.0001774193548387097, |
|
"loss": 1.0175, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.08998615597600369, |
|
"grad_norm": 0.27275770902633667, |
|
"learning_rate": 0.00017972350230414748, |
|
"loss": 1.008, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09113982464236271, |
|
"grad_norm": 0.2615206837654114, |
|
"learning_rate": 0.00018202764976958527, |
|
"loss": 1.0064, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.09229349330872173, |
|
"grad_norm": 0.2606607973575592, |
|
"learning_rate": 0.00018433179723502304, |
|
"loss": 0.9791, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09344716197508075, |
|
"grad_norm": 0.257321834564209, |
|
"learning_rate": 0.00018663594470046083, |
|
"loss": 0.9628, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.09460083064143977, |
|
"grad_norm": 0.26475539803504944, |
|
"learning_rate": 0.00018894009216589863, |
|
"loss": 0.9771, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0957544993077988, |
|
"grad_norm": 0.2725866138935089, |
|
"learning_rate": 0.00019124423963133642, |
|
"loss": 0.9667, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.09690816797415783, |
|
"grad_norm": 0.25924256443977356, |
|
"learning_rate": 0.00019354838709677422, |
|
"loss": 0.9865, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09806183664051685, |
|
"grad_norm": 0.27873703837394714, |
|
"learning_rate": 0.000195852534562212, |
|
"loss": 0.9485, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.09921550530687587, |
|
"grad_norm": 0.26027387380599976, |
|
"learning_rate": 0.00019815668202764977, |
|
"loss": 1.0007, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10036917397323489, |
|
"grad_norm": 0.2628461718559265, |
|
"learning_rate": 0.00019999996755554284, |
|
"loss": 0.971, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.10152284263959391, |
|
"grad_norm": 0.2701122462749481, |
|
"learning_rate": 0.00019999883200175287, |
|
"loss": 0.9721, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10267651130595293, |
|
"grad_norm": 0.28857171535491943, |
|
"learning_rate": 0.0001999960742461578, |
|
"loss": 0.9881, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.10383017997231195, |
|
"grad_norm": 0.2599029242992401, |
|
"learning_rate": 0.0001999916943334945, |
|
"loss": 0.9627, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10498384863867097, |
|
"grad_norm": 0.24774646759033203, |
|
"learning_rate": 0.0001999856923348149, |
|
"loss": 0.9885, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.10613751730503, |
|
"grad_norm": 0.26404890418052673, |
|
"learning_rate": 0.00019997806834748456, |
|
"loss": 0.9791, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.10729118597138902, |
|
"grad_norm": 0.25796157121658325, |
|
"learning_rate": 0.00019996882249518144, |
|
"loss": 0.9818, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.10844485463774804, |
|
"grad_norm": 0.2635892927646637, |
|
"learning_rate": 0.0001999579549278937, |
|
"loss": 0.9565, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.10959852330410706, |
|
"grad_norm": 0.2549423575401306, |
|
"learning_rate": 0.00019994546582191718, |
|
"loss": 0.9793, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.11075219197046608, |
|
"grad_norm": 0.24907591938972473, |
|
"learning_rate": 0.00019993135537985283, |
|
"loss": 0.9513, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1119058606368251, |
|
"grad_norm": 0.2583698332309723, |
|
"learning_rate": 0.00019991562383060317, |
|
"loss": 0.9672, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.11305952930318412, |
|
"grad_norm": 0.2518406808376312, |
|
"learning_rate": 0.00019989827142936862, |
|
"loss": 0.984, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11421319796954314, |
|
"grad_norm": 0.27680379152297974, |
|
"learning_rate": 0.00019987929845764345, |
|
"loss": 0.9874, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.11536686663590216, |
|
"grad_norm": 0.35969656705856323, |
|
"learning_rate": 0.00019985870522321118, |
|
"loss": 1.0057, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1165205353022612, |
|
"grad_norm": 0.2667674720287323, |
|
"learning_rate": 0.00019983649206013944, |
|
"loss": 1.0074, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.11767420396862022, |
|
"grad_norm": 0.265827476978302, |
|
"learning_rate": 0.00019981265932877488, |
|
"loss": 0.9374, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.11882787263497924, |
|
"grad_norm": 0.25846490263938904, |
|
"learning_rate": 0.00019978720741573692, |
|
"loss": 0.9766, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.11998154130133826, |
|
"grad_norm": 0.24783585965633392, |
|
"learning_rate": 0.00019976013673391182, |
|
"loss": 0.9783, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12113520996769728, |
|
"grad_norm": 0.25355273485183716, |
|
"learning_rate": 0.00019973144772244582, |
|
"loss": 0.9786, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.1222888786340563, |
|
"grad_norm": 0.2570498585700989, |
|
"learning_rate": 0.00019970114084673796, |
|
"loss": 0.9982, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.12344254730041532, |
|
"grad_norm": 0.25161927938461304, |
|
"learning_rate": 0.00019966921659843274, |
|
"loss": 0.9451, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.12459621596677434, |
|
"grad_norm": 0.2521721422672272, |
|
"learning_rate": 0.0001996356754954119, |
|
"loss": 0.9864, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.12574988463313336, |
|
"grad_norm": 0.2616737186908722, |
|
"learning_rate": 0.00019960051808178616, |
|
"loss": 0.9795, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.12690355329949238, |
|
"grad_norm": 0.25555992126464844, |
|
"learning_rate": 0.0001995637449278864, |
|
"loss": 0.9879, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1280572219658514, |
|
"grad_norm": 0.25149908661842346, |
|
"learning_rate": 0.0001995253566302543, |
|
"loss": 0.952, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.12921089063221042, |
|
"grad_norm": 0.24779251217842102, |
|
"learning_rate": 0.00019948535381163288, |
|
"loss": 0.9663, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13036455929856944, |
|
"grad_norm": 0.28940436244010925, |
|
"learning_rate": 0.00019944373712095615, |
|
"loss": 0.9649, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.13151822796492849, |
|
"grad_norm": 0.2656140923500061, |
|
"learning_rate": 0.00019940050723333866, |
|
"loss": 0.9729, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1326718966312875, |
|
"grad_norm": 0.26921287178993225, |
|
"learning_rate": 0.00019935566485006464, |
|
"loss": 0.9812, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.13382556529764653, |
|
"grad_norm": 0.2550528943538666, |
|
"learning_rate": 0.0001993092106985765, |
|
"loss": 0.9711, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.13497923396400555, |
|
"grad_norm": 0.26933753490448, |
|
"learning_rate": 0.0001992611455324632, |
|
"loss": 0.9782, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.13613290263036457, |
|
"grad_norm": 0.25276893377304077, |
|
"learning_rate": 0.0001992114701314478, |
|
"loss": 0.9438, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1372865712967236, |
|
"grad_norm": 0.2518392503261566, |
|
"learning_rate": 0.00019916018530137495, |
|
"loss": 0.9835, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.1384402399630826, |
|
"grad_norm": 0.2546631693840027, |
|
"learning_rate": 0.00019910729187419781, |
|
"loss": 1.0105, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13959390862944163, |
|
"grad_norm": 0.26017695665359497, |
|
"learning_rate": 0.00019905279070796454, |
|
"loss": 0.9554, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.14074757729580065, |
|
"grad_norm": 0.2573865056037903, |
|
"learning_rate": 0.0001989966826868044, |
|
"loss": 0.9939, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14190124596215967, |
|
"grad_norm": 0.25712475180625916, |
|
"learning_rate": 0.00019893896872091329, |
|
"loss": 0.9359, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.1430549146285187, |
|
"grad_norm": 0.2623005509376526, |
|
"learning_rate": 0.00019887964974653918, |
|
"loss": 0.9486, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1442085832948777, |
|
"grad_norm": 0.2532264292240143, |
|
"learning_rate": 0.00019881872672596683, |
|
"loss": 0.9803, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.14536225196123673, |
|
"grad_norm": 0.2683295011520386, |
|
"learning_rate": 0.00019875620064750202, |
|
"loss": 0.9611, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.14651592062759575, |
|
"grad_norm": 0.26294204592704773, |
|
"learning_rate": 0.00019869207252545584, |
|
"loss": 0.9749, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.14766958929395477, |
|
"grad_norm": 0.24312348663806915, |
|
"learning_rate": 0.00019862634340012795, |
|
"loss": 0.9444, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1488232579603138, |
|
"grad_norm": 0.25715410709381104, |
|
"learning_rate": 0.00019855901433778991, |
|
"loss": 0.9491, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.1499769266266728, |
|
"grad_norm": 0.2563926875591278, |
|
"learning_rate": 0.00019849008643066772, |
|
"loss": 0.9484, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15113059529303183, |
|
"grad_norm": 0.2505110800266266, |
|
"learning_rate": 0.0001984195607969242, |
|
"loss": 0.9911, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.15228426395939088, |
|
"grad_norm": 0.2632611095905304, |
|
"learning_rate": 0.0001983474385806408, |
|
"loss": 0.9644, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1534379326257499, |
|
"grad_norm": 0.25112318992614746, |
|
"learning_rate": 0.00019827372095179908, |
|
"loss": 0.9766, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.15459160129210892, |
|
"grad_norm": 0.2595708668231964, |
|
"learning_rate": 0.00019819840910626174, |
|
"loss": 0.9931, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.15574526995846794, |
|
"grad_norm": 0.2515600621700287, |
|
"learning_rate": 0.00019812150426575315, |
|
"loss": 0.9581, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.15689893862482696, |
|
"grad_norm": 0.2485395222902298, |
|
"learning_rate": 0.00019804300767783958, |
|
"loss": 0.9339, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.15805260729118598, |
|
"grad_norm": 0.25041699409484863, |
|
"learning_rate": 0.00019796292061590899, |
|
"loss": 0.9478, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.159206275957545, |
|
"grad_norm": 0.26130861043930054, |
|
"learning_rate": 0.0001978812443791503, |
|
"loss": 0.975, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16035994462390402, |
|
"grad_norm": 0.2546372711658478, |
|
"learning_rate": 0.00019779798029253241, |
|
"loss": 0.9673, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.16151361329026304, |
|
"grad_norm": 0.2580021023750305, |
|
"learning_rate": 0.00019771312970678258, |
|
"loss": 0.9921, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16266728195662206, |
|
"grad_norm": 0.24550603330135345, |
|
"learning_rate": 0.00019762669399836462, |
|
"loss": 0.9568, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.16382095062298108, |
|
"grad_norm": 0.25711777806282043, |
|
"learning_rate": 0.0001975386745694565, |
|
"loss": 0.9671, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1649746192893401, |
|
"grad_norm": 0.2552441656589508, |
|
"learning_rate": 0.00019744907284792776, |
|
"loss": 0.9796, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.16612828795569912, |
|
"grad_norm": 0.24158374965190887, |
|
"learning_rate": 0.00019735789028731604, |
|
"loss": 0.9841, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.16728195662205814, |
|
"grad_norm": 0.25179579854011536, |
|
"learning_rate": 0.00019726512836680378, |
|
"loss": 0.9817, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.16843562528841716, |
|
"grad_norm": 0.27372288703918457, |
|
"learning_rate": 0.0001971707885911941, |
|
"loss": 0.9654, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.16958929395477618, |
|
"grad_norm": 0.28059613704681396, |
|
"learning_rate": 0.00019707487249088641, |
|
"loss": 0.9669, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.1707429626211352, |
|
"grad_norm": 0.27208980917930603, |
|
"learning_rate": 0.00019697738162185161, |
|
"loss": 1.0043, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17189663128749424, |
|
"grad_norm": 0.25409960746765137, |
|
"learning_rate": 0.0001968783175656068, |
|
"loss": 0.9751, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.17305029995385326, |
|
"grad_norm": 0.2589627504348755, |
|
"learning_rate": 0.00019677768192918971, |
|
"loss": 0.9733, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17420396862021229, |
|
"grad_norm": 0.2695959210395813, |
|
"learning_rate": 0.00019667547634513247, |
|
"loss": 0.955, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.1753576372865713, |
|
"grad_norm": 0.25583890080451965, |
|
"learning_rate": 0.00019657170247143525, |
|
"loss": 0.955, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.17651130595293033, |
|
"grad_norm": 0.24999220669269562, |
|
"learning_rate": 0.0001964663619915394, |
|
"loss": 0.9715, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.17766497461928935, |
|
"grad_norm": 0.2531087100505829, |
|
"learning_rate": 0.00019635945661430006, |
|
"loss": 0.9917, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.17881864328564837, |
|
"grad_norm": 0.2580994665622711, |
|
"learning_rate": 0.0001962509880739584, |
|
"loss": 0.9876, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.17997231195200739, |
|
"grad_norm": 0.2500920593738556, |
|
"learning_rate": 0.00019614095813011364, |
|
"loss": 0.9964, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.1811259806183664, |
|
"grad_norm": 0.2500130534172058, |
|
"learning_rate": 0.0001960293685676943, |
|
"loss": 0.9536, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.18227964928472543, |
|
"grad_norm": 0.25024518370628357, |
|
"learning_rate": 0.0001959162211969295, |
|
"loss": 0.961, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.18343331795108445, |
|
"grad_norm": 0.25615155696868896, |
|
"learning_rate": 0.00019580151785331934, |
|
"loss": 0.9795, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.18458698661744347, |
|
"grad_norm": 0.2746971547603607, |
|
"learning_rate": 0.0001956852603976052, |
|
"loss": 0.99, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18574065528380249, |
|
"grad_norm": 0.2582091987133026, |
|
"learning_rate": 0.00019556745071573975, |
|
"loss": 0.9599, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.1868943239501615, |
|
"grad_norm": 0.25607603788375854, |
|
"learning_rate": 0.00019544809071885604, |
|
"loss": 0.9739, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.18804799261652053, |
|
"grad_norm": 0.25475266575813293, |
|
"learning_rate": 0.00019532718234323672, |
|
"loss": 0.9638, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.18920166128287955, |
|
"grad_norm": 0.2636029124259949, |
|
"learning_rate": 0.00019520472755028256, |
|
"loss": 1.0138, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.19035532994923857, |
|
"grad_norm": 0.25477829575538635, |
|
"learning_rate": 0.00019508072832648062, |
|
"loss": 0.928, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.1915089986155976, |
|
"grad_norm": 0.2600194215774536, |
|
"learning_rate": 0.00019495518668337201, |
|
"loss": 0.98, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.19266266728195663, |
|
"grad_norm": 0.2581029534339905, |
|
"learning_rate": 0.00019482810465751938, |
|
"loss": 0.9466, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.19381633594831565, |
|
"grad_norm": 0.26259845495224, |
|
"learning_rate": 0.0001946994843104737, |
|
"loss": 0.9702, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.19497000461467467, |
|
"grad_norm": 0.2556231617927551, |
|
"learning_rate": 0.00019456932772874091, |
|
"loss": 0.9752, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.1961236732810337, |
|
"grad_norm": 0.25811153650283813, |
|
"learning_rate": 0.00019443763702374812, |
|
"loss": 0.9616, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.19727734194739271, |
|
"grad_norm": 0.25802063941955566, |
|
"learning_rate": 0.00019430441433180925, |
|
"loss": 0.9736, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.19843101061375173, |
|
"grad_norm": 0.27993249893188477, |
|
"learning_rate": 0.00019416966181409046, |
|
"loss": 0.9527, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.19958467928011075, |
|
"grad_norm": 0.2626069188117981, |
|
"learning_rate": 0.00019403338165657508, |
|
"loss": 0.955, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.20073834794646978, |
|
"grad_norm": 0.2545926570892334, |
|
"learning_rate": 0.00019389557607002805, |
|
"loss": 0.9896, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2018920166128288, |
|
"grad_norm": 0.2604697346687317, |
|
"learning_rate": 0.0001937562472899603, |
|
"loss": 0.9769, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 0.2628583312034607, |
|
"learning_rate": 0.0001936153975765921, |
|
"loss": 0.9392, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.20419935394554684, |
|
"grad_norm": 0.2587975859642029, |
|
"learning_rate": 0.00019347302921481681, |
|
"loss": 0.9933, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.20535302261190586, |
|
"grad_norm": 0.27935993671417236, |
|
"learning_rate": 0.00019332914451416347, |
|
"loss": 0.9232, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.20650669127826488, |
|
"grad_norm": 0.2539782226085663, |
|
"learning_rate": 0.00019318374580875962, |
|
"loss": 0.9781, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.2076603599446239, |
|
"grad_norm": 0.27198004722595215, |
|
"learning_rate": 0.00019303683545729322, |
|
"loss": 0.9484, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.20881402861098292, |
|
"grad_norm": 0.25046050548553467, |
|
"learning_rate": 0.00019288841584297445, |
|
"loss": 0.9693, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.20996769727734194, |
|
"grad_norm": 0.2635994851589203, |
|
"learning_rate": 0.0001927384893734971, |
|
"loss": 0.9412, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.21112136594370096, |
|
"grad_norm": 0.26054415106773376, |
|
"learning_rate": 0.0001925870584809995, |
|
"loss": 0.9485, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.21227503461006, |
|
"grad_norm": 0.25425177812576294, |
|
"learning_rate": 0.00019243412562202497, |
|
"loss": 0.9841, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.21342870327641902, |
|
"grad_norm": 0.25780272483825684, |
|
"learning_rate": 0.00019227969327748211, |
|
"loss": 0.9502, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.21458237194277804, |
|
"grad_norm": 0.24851177632808685, |
|
"learning_rate": 0.00019212376395260448, |
|
"loss": 0.9875, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.21573604060913706, |
|
"grad_norm": 0.25959885120391846, |
|
"learning_rate": 0.00019196634017690993, |
|
"loss": 0.9734, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.21688970927549608, |
|
"grad_norm": 0.2571864426136017, |
|
"learning_rate": 0.00019180742450415964, |
|
"loss": 0.9636, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2180433779418551, |
|
"grad_norm": 0.2478172332048416, |
|
"learning_rate": 0.00019164701951231657, |
|
"loss": 0.9718, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.21919704660821412, |
|
"grad_norm": 0.27649134397506714, |
|
"learning_rate": 0.00019148512780350384, |
|
"loss": 0.9737, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.22035071527457314, |
|
"grad_norm": 0.25620779395103455, |
|
"learning_rate": 0.00019132175200396235, |
|
"loss": 0.9431, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.22150438394093216, |
|
"grad_norm": 0.2638430595397949, |
|
"learning_rate": 0.00019115689476400816, |
|
"loss": 0.984, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.22265805260729118, |
|
"grad_norm": 0.25410696864128113, |
|
"learning_rate": 0.00019099055875798973, |
|
"loss": 0.9703, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.2238117212736502, |
|
"grad_norm": 0.25862041115760803, |
|
"learning_rate": 0.00019082274668424422, |
|
"loss": 0.9509, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.22496538994000922, |
|
"grad_norm": 0.24762850999832153, |
|
"learning_rate": 0.0001906534612650539, |
|
"loss": 0.9424, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.22611905860636825, |
|
"grad_norm": 0.2532382309436798, |
|
"learning_rate": 0.00019048270524660196, |
|
"loss": 1.0109, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 0.296761155128479, |
|
"learning_rate": 0.000190310481398928, |
|
"loss": 0.9461, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.22842639593908629, |
|
"grad_norm": 0.2572844624519348, |
|
"learning_rate": 0.00019013679251588303, |
|
"loss": 0.9685, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2295800646054453, |
|
"grad_norm": 0.2736752927303314, |
|
"learning_rate": 0.00018996164141508412, |
|
"loss": 0.9788, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.23073373327180433, |
|
"grad_norm": 0.26040878891944885, |
|
"learning_rate": 0.00018978503093786882, |
|
"loss": 0.9669, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23188740193816337, |
|
"grad_norm": 0.26469501852989197, |
|
"learning_rate": 0.00018960696394924886, |
|
"loss": 1.0047, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.2330410706045224, |
|
"grad_norm": 0.25627848505973816, |
|
"learning_rate": 0.00018942744333786397, |
|
"loss": 0.9911, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2341947392708814, |
|
"grad_norm": 0.2675924003124237, |
|
"learning_rate": 0.00018924647201593473, |
|
"loss": 0.9934, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.23534840793724043, |
|
"grad_norm": 0.2544163763523102, |
|
"learning_rate": 0.00018906405291921547, |
|
"loss": 0.9674, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.23650207660359945, |
|
"grad_norm": 0.2576877772808075, |
|
"learning_rate": 0.00018888018900694667, |
|
"loss": 0.9345, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.23765574526995847, |
|
"grad_norm": 0.35056746006011963, |
|
"learning_rate": 0.00018869488326180679, |
|
"loss": 0.9842, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2388094139363175, |
|
"grad_norm": 0.25830191373825073, |
|
"learning_rate": 0.00018850813868986416, |
|
"loss": 0.9229, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.23996308260267651, |
|
"grad_norm": 0.25119414925575256, |
|
"learning_rate": 0.000188319958320528, |
|
"loss": 0.9594, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.24111675126903553, |
|
"grad_norm": 0.2568560242652893, |
|
"learning_rate": 0.0001881303452064992, |
|
"loss": 0.9645, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.24227041993539455, |
|
"grad_norm": 0.2648126482963562, |
|
"learning_rate": 0.0001879393024237212, |
|
"loss": 0.9627, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.24342408860175357, |
|
"grad_norm": 0.2568890154361725, |
|
"learning_rate": 0.00018774683307132953, |
|
"loss": 0.9595, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.2445777572681126, |
|
"grad_norm": 0.26714202761650085, |
|
"learning_rate": 0.00018755294027160204, |
|
"loss": 0.9474, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.24573142593447161, |
|
"grad_norm": 0.2748320996761322, |
|
"learning_rate": 0.00018735762716990797, |
|
"loss": 0.9474, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.24688509460083063, |
|
"grad_norm": 0.28429654240608215, |
|
"learning_rate": 0.00018716089693465696, |
|
"loss": 0.99, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.24803876326718965, |
|
"grad_norm": 0.2595331072807312, |
|
"learning_rate": 0.0001869627527572477, |
|
"loss": 0.9592, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.24919243193354867, |
|
"grad_norm": 0.2595003843307495, |
|
"learning_rate": 0.00018676319785201616, |
|
"loss": 0.9882, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2503461005999077, |
|
"grad_norm": 0.3043324649333954, |
|
"learning_rate": 0.00018656223545618345, |
|
"loss": 0.9573, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.2514997692662667, |
|
"grad_norm": 0.2606871724128723, |
|
"learning_rate": 0.00018635986882980325, |
|
"loss": 0.9808, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.25265343793262574, |
|
"grad_norm": 0.26122790575027466, |
|
"learning_rate": 0.00018615610125570902, |
|
"loss": 0.9523, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.25380710659898476, |
|
"grad_norm": 0.2673528492450714, |
|
"learning_rate": 0.00018595093603946053, |
|
"loss": 0.9495, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2549607752653438, |
|
"grad_norm": 0.25935789942741394, |
|
"learning_rate": 0.00018574437650929054, |
|
"loss": 0.9026, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.2561144439317028, |
|
"grad_norm": 0.28227198123931885, |
|
"learning_rate": 0.00018553642601605068, |
|
"loss": 0.9456, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2572681125980618, |
|
"grad_norm": 0.25894030928611755, |
|
"learning_rate": 0.00018532708793315693, |
|
"loss": 0.987, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.25842178126442084, |
|
"grad_norm": 0.2698533535003662, |
|
"learning_rate": 0.00018511636565653511, |
|
"loss": 0.9778, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.25957544993077986, |
|
"grad_norm": 0.25481945276260376, |
|
"learning_rate": 0.00018490426260456578, |
|
"loss": 0.978, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.2607291185971389, |
|
"grad_norm": 0.28665271401405334, |
|
"learning_rate": 0.0001846907822180286, |
|
"loss": 0.9764, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2618827872634979, |
|
"grad_norm": 0.2624281942844391, |
|
"learning_rate": 0.00018447592796004675, |
|
"loss": 0.9534, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.26303645592985697, |
|
"grad_norm": 0.2616020143032074, |
|
"learning_rate": 0.00018425970331603056, |
|
"loss": 0.9692, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.264190124596216, |
|
"grad_norm": 0.2609774172306061, |
|
"learning_rate": 0.00018404211179362114, |
|
"loss": 0.9681, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.265343793262575, |
|
"grad_norm": 0.2684124708175659, |
|
"learning_rate": 0.00018382315692263323, |
|
"loss": 1.005, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.26649746192893403, |
|
"grad_norm": 0.2579489052295685, |
|
"learning_rate": 0.0001836028422549983, |
|
"loss": 1.0064, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.26765113059529305, |
|
"grad_norm": 0.26837727427482605, |
|
"learning_rate": 0.00018338117136470648, |
|
"loss": 0.9873, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.26880479926165207, |
|
"grad_norm": 0.27158886194229126, |
|
"learning_rate": 0.00018315814784774901, |
|
"loss": 0.9775, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.2699584679280111, |
|
"grad_norm": 0.28705140948295593, |
|
"learning_rate": 0.00018293377532205968, |
|
"loss": 0.9876, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2711121365943701, |
|
"grad_norm": 0.2695893347263336, |
|
"learning_rate": 0.00018270805742745617, |
|
"loss": 0.9651, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.27226580526072913, |
|
"grad_norm": 0.2642047703266144, |
|
"learning_rate": 0.000182480997825581, |
|
"loss": 0.9849, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.27341947392708815, |
|
"grad_norm": 0.24997040629386902, |
|
"learning_rate": 0.00018225260019984215, |
|
"loss": 0.9574, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.2745731425934472, |
|
"grad_norm": 0.25589871406555176, |
|
"learning_rate": 0.0001820228682553533, |
|
"loss": 0.9311, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.2757268112598062, |
|
"grad_norm": 0.2490740865468979, |
|
"learning_rate": 0.00018179180571887372, |
|
"loss": 0.9471, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.2768804799261652, |
|
"grad_norm": 0.24929802119731903, |
|
"learning_rate": 0.00018155941633874787, |
|
"loss": 0.9551, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27803414859252423, |
|
"grad_norm": 0.2558966875076294, |
|
"learning_rate": 0.00018132570388484443, |
|
"loss": 0.9283, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.27918781725888325, |
|
"grad_norm": 0.26152223348617554, |
|
"learning_rate": 0.00018109067214849538, |
|
"loss": 0.9457, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2803414859252423, |
|
"grad_norm": 0.2626384198665619, |
|
"learning_rate": 0.0001808543249424343, |
|
"loss": 0.9678, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.2814951545916013, |
|
"grad_norm": 0.25507864356040955, |
|
"learning_rate": 0.00018061666610073464, |
|
"loss": 0.9639, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2826488232579603, |
|
"grad_norm": 0.27339187264442444, |
|
"learning_rate": 0.00018037769947874742, |
|
"loss": 0.9475, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.28380249192431933, |
|
"grad_norm": 0.26297956705093384, |
|
"learning_rate": 0.00018013742895303883, |
|
"loss": 0.9415, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.28495616059067835, |
|
"grad_norm": 0.2605285942554474, |
|
"learning_rate": 0.00017989585842132712, |
|
"loss": 0.9507, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.2861098292570374, |
|
"grad_norm": 0.2680794596672058, |
|
"learning_rate": 0.00017965299180241963, |
|
"loss": 0.9928, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2872634979233964, |
|
"grad_norm": 0.2609361410140991, |
|
"learning_rate": 0.00017940883303614905, |
|
"loss": 1.0034, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.2884171665897554, |
|
"grad_norm": 0.26120924949645996, |
|
"learning_rate": 0.0001791633860833096, |
|
"loss": 0.9703, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.28957083525611443, |
|
"grad_norm": 0.259295791387558, |
|
"learning_rate": 0.0001789166549255926, |
|
"loss": 0.9717, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.29072450392247345, |
|
"grad_norm": 0.25710657238960266, |
|
"learning_rate": 0.00017866864356552213, |
|
"loss": 0.9773, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2918781725888325, |
|
"grad_norm": 0.25823676586151123, |
|
"learning_rate": 0.00017841935602638996, |
|
"loss": 0.9771, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.2930318412551915, |
|
"grad_norm": 0.2612430155277252, |
|
"learning_rate": 0.00017816879635219028, |
|
"loss": 0.9257, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.2941855099215505, |
|
"grad_norm": 0.2545841932296753, |
|
"learning_rate": 0.0001779169686075541, |
|
"loss": 0.931, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.29533917858790953, |
|
"grad_norm": 0.2565891146659851, |
|
"learning_rate": 0.0001776638768776834, |
|
"loss": 0.9442, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.29649284725426855, |
|
"grad_norm": 0.2589223384857178, |
|
"learning_rate": 0.00017740952526828466, |
|
"loss": 0.9319, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.2976465159206276, |
|
"grad_norm": 0.26724082231521606, |
|
"learning_rate": 0.00017715391790550252, |
|
"loss": 0.9854, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2988001845869866, |
|
"grad_norm": 0.2645433247089386, |
|
"learning_rate": 0.00017689705893585272, |
|
"loss": 0.9683, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.2999538532533456, |
|
"grad_norm": 0.2574128210544586, |
|
"learning_rate": 0.0001766389525261547, |
|
"loss": 0.987, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.30110752191970463, |
|
"grad_norm": 0.2574234902858734, |
|
"learning_rate": 0.00017637960286346425, |
|
"loss": 0.9991, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.30226119058606365, |
|
"grad_norm": 0.255074143409729, |
|
"learning_rate": 0.00017611901415500535, |
|
"loss": 1.0042, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.30341485925242273, |
|
"grad_norm": 0.26597529649734497, |
|
"learning_rate": 0.00017585719062810214, |
|
"loss": 0.9865, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.30456852791878175, |
|
"grad_norm": 0.2626805901527405, |
|
"learning_rate": 0.00017559413653011024, |
|
"loss": 0.9751, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.30572219658514077, |
|
"grad_norm": 0.24981103837490082, |
|
"learning_rate": 0.0001753298561283478, |
|
"loss": 0.9628, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.3068758652514998, |
|
"grad_norm": 0.25727543234825134, |
|
"learning_rate": 0.00017506435371002633, |
|
"loss": 0.9427, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3080295339178588, |
|
"grad_norm": 0.27170515060424805, |
|
"learning_rate": 0.00017479763358218119, |
|
"loss": 0.9777, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.30918320258421783, |
|
"grad_norm": 0.27030548453330994, |
|
"learning_rate": 0.0001745297000716016, |
|
"loss": 0.9678, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.31033687125057685, |
|
"grad_norm": 0.27044013142585754, |
|
"learning_rate": 0.0001742605575247606, |
|
"loss": 1.0005, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.31149053991693587, |
|
"grad_norm": 0.2567753195762634, |
|
"learning_rate": 0.00017399021030774442, |
|
"loss": 0.9379, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3126442085832949, |
|
"grad_norm": 0.2584477961063385, |
|
"learning_rate": 0.00017371866280618177, |
|
"loss": 0.9599, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.3137978772496539, |
|
"grad_norm": 0.26751193404197693, |
|
"learning_rate": 0.0001734459194251725, |
|
"loss": 0.9778, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.31495154591601293, |
|
"grad_norm": 0.26162561774253845, |
|
"learning_rate": 0.00017317198458921638, |
|
"loss": 0.9809, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.31610521458237195, |
|
"grad_norm": 0.26419851183891296, |
|
"learning_rate": 0.00017289686274214118, |
|
"loss": 0.9779, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.31725888324873097, |
|
"grad_norm": 0.25940561294555664, |
|
"learning_rate": 0.00017262055834703054, |
|
"loss": 0.9899, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.31841255191509, |
|
"grad_norm": 0.2542431652545929, |
|
"learning_rate": 0.00017234307588615176, |
|
"loss": 0.9748, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.319566220581449, |
|
"grad_norm": 0.25366565585136414, |
|
"learning_rate": 0.0001720644198608829, |
|
"loss": 0.9828, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.32071988924780803, |
|
"grad_norm": 0.2604008913040161, |
|
"learning_rate": 0.00017178459479163976, |
|
"loss": 0.9697, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.32187355791416705, |
|
"grad_norm": 0.2703900933265686, |
|
"learning_rate": 0.0001715036052178028, |
|
"loss": 0.986, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.3230272265805261, |
|
"grad_norm": 0.25708577036857605, |
|
"learning_rate": 0.0001712214556976431, |
|
"loss": 0.9457, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3241808952468851, |
|
"grad_norm": 0.26468509435653687, |
|
"learning_rate": 0.00017093815080824876, |
|
"loss": 0.9599, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.3253345639132441, |
|
"grad_norm": 0.2600449025630951, |
|
"learning_rate": 0.00017065369514545053, |
|
"loss": 0.941, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.32648823257960313, |
|
"grad_norm": 0.2549765110015869, |
|
"learning_rate": 0.00017036809332374712, |
|
"loss": 0.9735, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.32764190124596215, |
|
"grad_norm": 0.2710689902305603, |
|
"learning_rate": 0.00017008134997623065, |
|
"loss": 0.9949, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3287955699123212, |
|
"grad_norm": 0.2554212808609009, |
|
"learning_rate": 0.00016979346975451112, |
|
"loss": 0.9741, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.3299492385786802, |
|
"grad_norm": 0.2632601261138916, |
|
"learning_rate": 0.00016950445732864127, |
|
"loss": 0.9951, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3311029072450392, |
|
"grad_norm": 0.2675810754299164, |
|
"learning_rate": 0.0001692143173870407, |
|
"loss": 0.9544, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.33225657591139823, |
|
"grad_norm": 0.26224300265312195, |
|
"learning_rate": 0.00016892305463641965, |
|
"loss": 0.9297, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.33341024457775725, |
|
"grad_norm": 0.2826540470123291, |
|
"learning_rate": 0.00016863067380170298, |
|
"loss": 0.9588, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.3345639132441163, |
|
"grad_norm": 0.2714956998825073, |
|
"learning_rate": 0.00016833717962595326, |
|
"loss": 0.9682, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3357175819104753, |
|
"grad_norm": 0.24968619644641876, |
|
"learning_rate": 0.00016804257687029389, |
|
"loss": 1.0191, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.3368712505768343, |
|
"grad_norm": 0.26332545280456543, |
|
"learning_rate": 0.00016774687031383188, |
|
"loss": 0.957, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.33802491924319333, |
|
"grad_norm": 0.25739216804504395, |
|
"learning_rate": 0.00016745006475358046, |
|
"loss": 0.9747, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.33917858790955235, |
|
"grad_norm": 0.26346343755722046, |
|
"learning_rate": 0.00016715216500438093, |
|
"loss": 0.9414, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3403322565759114, |
|
"grad_norm": 0.25862592458724976, |
|
"learning_rate": 0.0001668531758988249, |
|
"loss": 0.962, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.3414859252422704, |
|
"grad_norm": 0.25549453496932983, |
|
"learning_rate": 0.00016655310228717564, |
|
"loss": 0.9575, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3426395939086294, |
|
"grad_norm": 0.26341837644577026, |
|
"learning_rate": 0.0001662519490372896, |
|
"loss": 0.9546, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.3437932625749885, |
|
"grad_norm": 0.279466450214386, |
|
"learning_rate": 0.00016594972103453726, |
|
"loss": 0.9755, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3449469312413475, |
|
"grad_norm": 0.2663206160068512, |
|
"learning_rate": 0.00016564642318172402, |
|
"loss": 0.9683, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.34610059990770653, |
|
"grad_norm": 0.2633247971534729, |
|
"learning_rate": 0.00016534206039901057, |
|
"loss": 0.9425, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.34725426857406555, |
|
"grad_norm": 0.26610881090164185, |
|
"learning_rate": 0.00016503663762383312, |
|
"loss": 0.9571, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.34840793724042457, |
|
"grad_norm": 0.25422990322113037, |
|
"learning_rate": 0.00016473015981082338, |
|
"loss": 0.965, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3495616059067836, |
|
"grad_norm": 0.26195505261421204, |
|
"learning_rate": 0.000164422631931728, |
|
"loss": 0.9811, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.3507152745731426, |
|
"grad_norm": 0.2687501609325409, |
|
"learning_rate": 0.00016411405897532802, |
|
"loss": 0.9657, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.35186894323950163, |
|
"grad_norm": 0.27142685651779175, |
|
"learning_rate": 0.000163804445947358, |
|
"loss": 0.9656, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.35302261190586065, |
|
"grad_norm": 0.2657437026500702, |
|
"learning_rate": 0.00016349379787042477, |
|
"loss": 0.9561, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.35417628057221967, |
|
"grad_norm": 0.266258180141449, |
|
"learning_rate": 0.00016318211978392589, |
|
"loss": 0.9611, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.3553299492385787, |
|
"grad_norm": 0.2663586735725403, |
|
"learning_rate": 0.00016286941674396787, |
|
"loss": 0.9368, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3564836179049377, |
|
"grad_norm": 0.2677685618400574, |
|
"learning_rate": 0.00016255569382328443, |
|
"loss": 0.9867, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.35763728657129673, |
|
"grad_norm": 0.25513461232185364, |
|
"learning_rate": 0.00016224095611115384, |
|
"loss": 0.9874, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.35879095523765575, |
|
"grad_norm": 0.24931229650974274, |
|
"learning_rate": 0.00016192520871331661, |
|
"loss": 0.9173, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.35994462390401477, |
|
"grad_norm": 0.25578710436820984, |
|
"learning_rate": 0.00016160845675189254, |
|
"loss": 0.9848, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.3610982925703738, |
|
"grad_norm": 0.26572901010513306, |
|
"learning_rate": 0.00016129070536529766, |
|
"loss": 0.9756, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.3622519612367328, |
|
"grad_norm": 0.28126901388168335, |
|
"learning_rate": 0.00016097195970816094, |
|
"loss": 0.9627, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.36340562990309183, |
|
"grad_norm": 0.27309858798980713, |
|
"learning_rate": 0.00016065222495124056, |
|
"loss": 0.9393, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.36455929856945085, |
|
"grad_norm": 0.2919960021972656, |
|
"learning_rate": 0.00016033150628134011, |
|
"loss": 0.9475, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.36571296723580987, |
|
"grad_norm": 0.2607177495956421, |
|
"learning_rate": 0.0001600098089012244, |
|
"loss": 0.9655, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.3668666359021689, |
|
"grad_norm": 0.26328402757644653, |
|
"learning_rate": 0.0001596871380295351, |
|
"loss": 0.9608, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3680203045685279, |
|
"grad_norm": 0.27382150292396545, |
|
"learning_rate": 0.00015936349890070602, |
|
"loss": 0.9505, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.36917397323488693, |
|
"grad_norm": 0.26438233256340027, |
|
"learning_rate": 0.00015903889676487833, |
|
"loss": 0.9227, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.37032764190124595, |
|
"grad_norm": 0.2556706666946411, |
|
"learning_rate": 0.00015871333688781522, |
|
"loss": 0.9931, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.37148131056760497, |
|
"grad_norm": 0.27087247371673584, |
|
"learning_rate": 0.00015838682455081657, |
|
"loss": 0.9914, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.372634979233964, |
|
"grad_norm": 0.27084994316101074, |
|
"learning_rate": 0.00015805936505063322, |
|
"loss": 0.9641, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.373788647900323, |
|
"grad_norm": 0.2637239396572113, |
|
"learning_rate": 0.00015773096369938125, |
|
"loss": 0.96, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.37494231656668203, |
|
"grad_norm": 0.25956466794013977, |
|
"learning_rate": 0.00015740162582445546, |
|
"loss": 0.9617, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.37609598523304105, |
|
"grad_norm": 0.2576027810573578, |
|
"learning_rate": 0.0001570713567684432, |
|
"loss": 0.9539, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3772496538994001, |
|
"grad_norm": 0.25418850779533386, |
|
"learning_rate": 0.00015674016188903762, |
|
"loss": 0.9145, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.3784033225657591, |
|
"grad_norm": 0.2608237564563751, |
|
"learning_rate": 0.00015640804655895084, |
|
"loss": 0.9819, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.3795569912321181, |
|
"grad_norm": 0.2650778889656067, |
|
"learning_rate": 0.00015607501616582664, |
|
"loss": 0.9296, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.38071065989847713, |
|
"grad_norm": 0.2612312138080597, |
|
"learning_rate": 0.00015574107611215319, |
|
"loss": 0.9691, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.38186432856483615, |
|
"grad_norm": 0.2598936855792999, |
|
"learning_rate": 0.00015540623181517531, |
|
"loss": 0.9305, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.3830179972311952, |
|
"grad_norm": 0.26123687624931335, |
|
"learning_rate": 0.00015507048870680668, |
|
"loss": 0.9859, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.38417166589755425, |
|
"grad_norm": 0.26335862278938293, |
|
"learning_rate": 0.00015473385223354168, |
|
"loss": 0.9645, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.38532533456391327, |
|
"grad_norm": 0.26434382796287537, |
|
"learning_rate": 0.00015439632785636706, |
|
"loss": 0.9682, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.3864790032302723, |
|
"grad_norm": 0.2613257169723511, |
|
"learning_rate": 0.00015405792105067332, |
|
"loss": 0.9602, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.3876326718966313, |
|
"grad_norm": 0.2707611918449402, |
|
"learning_rate": 0.00015371863730616586, |
|
"loss": 0.9942, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.38878634056299033, |
|
"grad_norm": 0.25917673110961914, |
|
"learning_rate": 0.00015337848212677597, |
|
"loss": 0.9372, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.38994000922934935, |
|
"grad_norm": 0.27119165658950806, |
|
"learning_rate": 0.00015303746103057162, |
|
"loss": 0.9884, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.39109367789570837, |
|
"grad_norm": 0.26019906997680664, |
|
"learning_rate": 0.00015269557954966778, |
|
"loss": 0.9684, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.3922473465620674, |
|
"grad_norm": 0.2654115557670593, |
|
"learning_rate": 0.00015235284323013675, |
|
"loss": 0.955, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3934010152284264, |
|
"grad_norm": 0.26324862241744995, |
|
"learning_rate": 0.0001520092576319183, |
|
"loss": 0.9464, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.39455468389478543, |
|
"grad_norm": 0.270831823348999, |
|
"learning_rate": 0.00015166482832872923, |
|
"loss": 0.9716, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.39570835256114445, |
|
"grad_norm": 0.27920085191726685, |
|
"learning_rate": 0.00015131956090797325, |
|
"loss": 0.9674, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.39686202122750347, |
|
"grad_norm": 0.27237191796302795, |
|
"learning_rate": 0.00015097346097065007, |
|
"loss": 1.0088, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3980156898938625, |
|
"grad_norm": 0.27462947368621826, |
|
"learning_rate": 0.0001506265341312647, |
|
"loss": 0.9321, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.3991693585602215, |
|
"grad_norm": 0.26354286074638367, |
|
"learning_rate": 0.00015027878601773633, |
|
"loss": 0.9591, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.40032302722658053, |
|
"grad_norm": 0.2659231126308441, |
|
"learning_rate": 0.000149930222271307, |
|
"loss": 0.9717, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.40147669589293955, |
|
"grad_norm": 0.2720133662223816, |
|
"learning_rate": 0.0001495808485464502, |
|
"loss": 0.9727, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.40263036455929857, |
|
"grad_norm": 0.25452399253845215, |
|
"learning_rate": 0.00014923067051077893, |
|
"loss": 0.9862, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.4037840332256576, |
|
"grad_norm": 0.2635675072669983, |
|
"learning_rate": 0.00014887969384495402, |
|
"loss": 0.9508, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.4049377018920166, |
|
"grad_norm": 0.2562003433704376, |
|
"learning_rate": 0.0001485279242425917, |
|
"loss": 0.9942, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 0.26377397775650024, |
|
"learning_rate": 0.00014817536741017152, |
|
"loss": 0.9425, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.40724503922473465, |
|
"grad_norm": 0.2801288664340973, |
|
"learning_rate": 0.0001478220290669436, |
|
"loss": 0.9897, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.40839870789109367, |
|
"grad_norm": 0.27296552062034607, |
|
"learning_rate": 0.00014746791494483583, |
|
"loss": 0.9771, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4095523765574527, |
|
"grad_norm": 0.27594876289367676, |
|
"learning_rate": 0.000147113030788361, |
|
"loss": 0.9926, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.4107060452238117, |
|
"grad_norm": 0.2638562321662903, |
|
"learning_rate": 0.00014675738235452352, |
|
"loss": 0.9543, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.41185971389017073, |
|
"grad_norm": 0.2608337998390198, |
|
"learning_rate": 0.0001464009754127261, |
|
"loss": 0.9612, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.41301338255652975, |
|
"grad_norm": 0.2768416702747345, |
|
"learning_rate": 0.00014604381574467615, |
|
"loss": 0.9678, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.41416705122288877, |
|
"grad_norm": 0.25971782207489014, |
|
"learning_rate": 0.0001456859091442919, |
|
"loss": 0.9335, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.4153207198892478, |
|
"grad_norm": 0.25914978981018066, |
|
"learning_rate": 0.00014532726141760848, |
|
"loss": 1.0066, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4164743885556068, |
|
"grad_norm": 0.25939711928367615, |
|
"learning_rate": 0.00014496787838268378, |
|
"loss": 0.9687, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.41762805722196583, |
|
"grad_norm": 0.2599487900733948, |
|
"learning_rate": 0.00014460776586950393, |
|
"loss": 0.9776, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.41878172588832485, |
|
"grad_norm": 0.25991660356521606, |
|
"learning_rate": 0.00014424692971988886, |
|
"loss": 0.9543, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.41993539455468387, |
|
"grad_norm": 0.2667793035507202, |
|
"learning_rate": 0.0001438853757873975, |
|
"loss": 0.944, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4210890632210429, |
|
"grad_norm": 0.2818945348262787, |
|
"learning_rate": 0.00014352310993723277, |
|
"loss": 0.9587, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.4222427318874019, |
|
"grad_norm": 0.271699994802475, |
|
"learning_rate": 0.00014316013804614643, |
|
"loss": 0.9647, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.423396400553761, |
|
"grad_norm": 0.2668125331401825, |
|
"learning_rate": 0.00014279646600234386, |
|
"loss": 0.9732, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.42455006922012, |
|
"grad_norm": 0.25676923990249634, |
|
"learning_rate": 0.00014243209970538846, |
|
"loss": 0.9776, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.42570373788647903, |
|
"grad_norm": 0.2696235775947571, |
|
"learning_rate": 0.00014206704506610583, |
|
"loss": 0.9644, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.42685740655283805, |
|
"grad_norm": 0.25627243518829346, |
|
"learning_rate": 0.00014170130800648814, |
|
"loss": 0.9586, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.42801107521919707, |
|
"grad_norm": 0.2514009475708008, |
|
"learning_rate": 0.00014133489445959787, |
|
"loss": 0.9311, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.4291647438855561, |
|
"grad_norm": 0.24791298806667328, |
|
"learning_rate": 0.00014096781036947157, |
|
"loss": 0.9819, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.4303184125519151, |
|
"grad_norm": 0.2555059492588043, |
|
"learning_rate": 0.00014060006169102363, |
|
"loss": 0.9501, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.43147208121827413, |
|
"grad_norm": 0.25847703218460083, |
|
"learning_rate": 0.0001402316543899493, |
|
"loss": 0.9871, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.43262574988463315, |
|
"grad_norm": 0.2684321701526642, |
|
"learning_rate": 0.0001398625944426284, |
|
"loss": 0.9871, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.43377941855099217, |
|
"grad_norm": 0.2601282000541687, |
|
"learning_rate": 0.0001394928878360279, |
|
"loss": 1.0022, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4349330872173512, |
|
"grad_norm": 0.26379039883613586, |
|
"learning_rate": 0.0001391225405676051, |
|
"loss": 0.9693, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.4360867558837102, |
|
"grad_norm": 0.2606624662876129, |
|
"learning_rate": 0.0001387515586452103, |
|
"loss": 0.9585, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.43724042455006923, |
|
"grad_norm": 0.269196480512619, |
|
"learning_rate": 0.0001383799480869892, |
|
"loss": 0.9539, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.43839409321642825, |
|
"grad_norm": 0.26253777742385864, |
|
"learning_rate": 0.00013800771492128536, |
|
"loss": 0.9294, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.43954776188278727, |
|
"grad_norm": 0.2613804340362549, |
|
"learning_rate": 0.00013763486518654253, |
|
"loss": 0.9316, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.4407014305491463, |
|
"grad_norm": 0.25759026408195496, |
|
"learning_rate": 0.0001372614049312064, |
|
"loss": 0.9558, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.4418550992155053, |
|
"grad_norm": 0.25641101598739624, |
|
"learning_rate": 0.00013688734021362675, |
|
"loss": 0.9405, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.44300876788186433, |
|
"grad_norm": 0.26334041357040405, |
|
"learning_rate": 0.00013651267710195907, |
|
"loss": 0.9624, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.44416243654822335, |
|
"grad_norm": 0.2584952712059021, |
|
"learning_rate": 0.00013613742167406612, |
|
"loss": 0.9953, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.44531610521458237, |
|
"grad_norm": 0.2532113790512085, |
|
"learning_rate": 0.00013576158001741932, |
|
"loss": 0.942, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4464697738809414, |
|
"grad_norm": 0.25842392444610596, |
|
"learning_rate": 0.00013538515822900002, |
|
"loss": 0.941, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.4476234425473004, |
|
"grad_norm": 0.27943527698516846, |
|
"learning_rate": 0.00013500816241520058, |
|
"loss": 0.9809, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.44877711121365943, |
|
"grad_norm": 0.2741059362888336, |
|
"learning_rate": 0.00013463059869172535, |
|
"loss": 0.9559, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.44993077988001845, |
|
"grad_norm": 0.27189043164253235, |
|
"learning_rate": 0.00013425247318349137, |
|
"loss": 0.9496, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.45108444854637747, |
|
"grad_norm": 0.2595981955528259, |
|
"learning_rate": 0.00013387379202452917, |
|
"loss": 0.9404, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.4522381172127365, |
|
"grad_norm": 0.2670726180076599, |
|
"learning_rate": 0.00013349456135788298, |
|
"loss": 0.9933, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.4533917858790955, |
|
"grad_norm": 0.26899731159210205, |
|
"learning_rate": 0.0001331147873355115, |
|
"loss": 0.9498, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 0.25968730449676514, |
|
"learning_rate": 0.00013273447611818767, |
|
"loss": 0.9623, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.45569912321181355, |
|
"grad_norm": 0.2737217843532562, |
|
"learning_rate": 0.000132353633875399, |
|
"loss": 0.9661, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.45685279187817257, |
|
"grad_norm": 0.2898109555244446, |
|
"learning_rate": 0.00013197226678524738, |
|
"loss": 0.9716, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.4580064605445316, |
|
"grad_norm": 0.27403295040130615, |
|
"learning_rate": 0.00013159038103434888, |
|
"loss": 0.9601, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.4591601292108906, |
|
"grad_norm": 0.2780100405216217, |
|
"learning_rate": 0.00013120798281773347, |
|
"loss": 0.9191, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.46031379787724963, |
|
"grad_norm": 0.25929951667785645, |
|
"learning_rate": 0.00013082507833874426, |
|
"loss": 0.9445, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.46146746654360865, |
|
"grad_norm": 0.2623712420463562, |
|
"learning_rate": 0.00013044167380893727, |
|
"loss": 0.9961, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.46262113520996767, |
|
"grad_norm": 0.27212023735046387, |
|
"learning_rate": 0.00013005777544798026, |
|
"loss": 0.9497, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.46377480387632675, |
|
"grad_norm": 0.2612648606300354, |
|
"learning_rate": 0.00012967338948355217, |
|
"loss": 0.9477, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.46492847254268577, |
|
"grad_norm": 0.27296924591064453, |
|
"learning_rate": 0.0001292885221512419, |
|
"loss": 0.9788, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.4660821412090448, |
|
"grad_norm": 0.24790716171264648, |
|
"learning_rate": 0.00012890317969444716, |
|
"loss": 0.942, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4672358098754038, |
|
"grad_norm": 0.27568113803863525, |
|
"learning_rate": 0.00012851736836427325, |
|
"loss": 0.9746, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.4683894785417628, |
|
"grad_norm": 0.2629447281360626, |
|
"learning_rate": 0.00012813109441943166, |
|
"loss": 0.9481, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.46954314720812185, |
|
"grad_norm": 0.264850914478302, |
|
"learning_rate": 0.00012774436412613845, |
|
"loss": 0.9723, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.47069681587448087, |
|
"grad_norm": 0.27910053730010986, |
|
"learning_rate": 0.0001273571837580127, |
|
"loss": 0.9671, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4718504845408399, |
|
"grad_norm": 0.26526764035224915, |
|
"learning_rate": 0.0001269695595959747, |
|
"loss": 0.9891, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.4730041532071989, |
|
"grad_norm": 0.27195367217063904, |
|
"learning_rate": 0.00012658149792814404, |
|
"loss": 0.9624, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4741578218735579, |
|
"grad_norm": 0.2728367745876312, |
|
"learning_rate": 0.00012619300504973762, |
|
"loss": 0.9487, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.47531149053991695, |
|
"grad_norm": 0.2927708625793457, |
|
"learning_rate": 0.0001258040872629676, |
|
"loss": 0.9206, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.47646515920627597, |
|
"grad_norm": 0.2730552852153778, |
|
"learning_rate": 0.00012541475087693896, |
|
"loss": 0.9995, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.477618827872635, |
|
"grad_norm": 0.26003938913345337, |
|
"learning_rate": 0.00012502500220754737, |
|
"loss": 0.9627, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.478772496538994, |
|
"grad_norm": 0.26204952597618103, |
|
"learning_rate": 0.00012463484757737662, |
|
"loss": 0.959, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.47992616520535303, |
|
"grad_norm": 0.25478288531303406, |
|
"learning_rate": 0.0001242442933155961, |
|
"loss": 0.9581, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.48107983387171205, |
|
"grad_norm": 0.2646753489971161, |
|
"learning_rate": 0.0001238533457578581, |
|
"loss": 0.9405, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.48223350253807107, |
|
"grad_norm": 0.2728932201862335, |
|
"learning_rate": 0.00012346201124619502, |
|
"loss": 0.9375, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.4833871712044301, |
|
"grad_norm": 0.26134753227233887, |
|
"learning_rate": 0.00012307029612891655, |
|
"loss": 0.9525, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.4845408398707891, |
|
"grad_norm": 0.2668938636779785, |
|
"learning_rate": 0.00012267820676050656, |
|
"loss": 0.9239, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.48569450853714813, |
|
"grad_norm": 0.25549912452697754, |
|
"learning_rate": 0.00012228574950152017, |
|
"loss": 0.9374, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.48684817720350715, |
|
"grad_norm": 0.5779925584793091, |
|
"learning_rate": 0.00012189293071848051, |
|
"loss": 0.9765, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.48800184586986617, |
|
"grad_norm": 0.25671547651290894, |
|
"learning_rate": 0.00012149975678377541, |
|
"loss": 1.0103, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.4891555145362252, |
|
"grad_norm": 0.2744820713996887, |
|
"learning_rate": 0.00012110623407555397, |
|
"loss": 0.9772, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4903091832025842, |
|
"grad_norm": 0.2585495114326477, |
|
"learning_rate": 0.00012071236897762325, |
|
"loss": 0.9565, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.49146285186894323, |
|
"grad_norm": 0.2611427307128906, |
|
"learning_rate": 0.00012031816787934464, |
|
"loss": 0.9505, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.49261652053530225, |
|
"grad_norm": 0.2629440426826477, |
|
"learning_rate": 0.00011992363717553015, |
|
"loss": 0.9399, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.49377018920166127, |
|
"grad_norm": 0.25579625368118286, |
|
"learning_rate": 0.00011952878326633872, |
|
"loss": 0.8824, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4949238578680203, |
|
"grad_norm": 0.26876822113990784, |
|
"learning_rate": 0.00011913361255717241, |
|
"loss": 0.949, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.4960775265343793, |
|
"grad_norm": 0.2676302194595337, |
|
"learning_rate": 0.00011873813145857249, |
|
"loss": 0.9855, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.49723119520073833, |
|
"grad_norm": 0.26593196392059326, |
|
"learning_rate": 0.00011834234638611539, |
|
"loss": 0.9051, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.49838486386709735, |
|
"grad_norm": 0.2615763247013092, |
|
"learning_rate": 0.00011794626376030866, |
|
"loss": 0.9362, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.49953853253345637, |
|
"grad_norm": 0.260310560464859, |
|
"learning_rate": 0.00011754989000648693, |
|
"loss": 0.9614, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.5006922011998154, |
|
"grad_norm": 0.26325076818466187, |
|
"learning_rate": 0.00011715323155470745, |
|
"loss": 0.9321, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5018458698661744, |
|
"grad_norm": 0.2704361081123352, |
|
"learning_rate": 0.00011675629483964596, |
|
"loss": 0.933, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.5029995385325334, |
|
"grad_norm": 0.2598739266395569, |
|
"learning_rate": 0.0001163590863004922, |
|
"loss": 0.9706, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5041532071988925, |
|
"grad_norm": 0.2715523838996887, |
|
"learning_rate": 0.0001159616123808455, |
|
"loss": 0.9422, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.5053068758652515, |
|
"grad_norm": 0.2593281865119934, |
|
"learning_rate": 0.00011556387952861036, |
|
"loss": 1.0014, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.5064605445316105, |
|
"grad_norm": 0.26070570945739746, |
|
"learning_rate": 0.0001151658941958916, |
|
"loss": 0.9399, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"grad_norm": 0.2727888524532318, |
|
"learning_rate": 0.00011476766283888986, |
|
"loss": 0.9557, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5087678818643285, |
|
"grad_norm": 0.2623576819896698, |
|
"learning_rate": 0.00011436919191779687, |
|
"loss": 0.976, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.5099215505306876, |
|
"grad_norm": 0.26719269156455994, |
|
"learning_rate": 0.0001139704878966906, |
|
"loss": 0.9823, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5110752191970466, |
|
"grad_norm": 0.2678567171096802, |
|
"learning_rate": 0.00011357155724343045, |
|
"loss": 0.9361, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.5122288878634056, |
|
"grad_norm": 0.2595786154270172, |
|
"learning_rate": 0.00011317240642955225, |
|
"loss": 0.9625, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5133825565297646, |
|
"grad_norm": 0.2738422751426697, |
|
"learning_rate": 0.00011277304193016332, |
|
"loss": 0.985, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.5145362251961236, |
|
"grad_norm": 0.2674945890903473, |
|
"learning_rate": 0.00011237347022383746, |
|
"loss": 0.9513, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5156898938624827, |
|
"grad_norm": 0.2577970325946808, |
|
"learning_rate": 0.00011197369779250979, |
|
"loss": 0.9865, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.5168435625288417, |
|
"grad_norm": 0.27681398391723633, |
|
"learning_rate": 0.00011157373112137171, |
|
"loss": 0.9499, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5179972311952007, |
|
"grad_norm": 0.27841857075691223, |
|
"learning_rate": 0.0001111735766987655, |
|
"loss": 0.9625, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.5191508998615597, |
|
"grad_norm": 0.26672080159187317, |
|
"learning_rate": 0.00011077324101607929, |
|
"loss": 0.9517, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5203045685279187, |
|
"grad_norm": 0.2566830813884735, |
|
"learning_rate": 0.00011037273056764157, |
|
"loss": 0.9508, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.5214582371942778, |
|
"grad_norm": 0.2679496705532074, |
|
"learning_rate": 0.00010997205185061599, |
|
"loss": 0.9332, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5226119058606368, |
|
"grad_norm": 0.2658250629901886, |
|
"learning_rate": 0.00010957121136489581, |
|
"loss": 0.95, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.5237655745269958, |
|
"grad_norm": 0.2708585560321808, |
|
"learning_rate": 0.00010917021561299863, |
|
"loss": 0.9577, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5249192431933549, |
|
"grad_norm": 0.25992119312286377, |
|
"learning_rate": 0.0001087690710999607, |
|
"loss": 0.969, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.5260729118597139, |
|
"grad_norm": 0.2593708336353302, |
|
"learning_rate": 0.00010836778433323158, |
|
"loss": 0.9516, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.527226580526073, |
|
"grad_norm": 0.267589807510376, |
|
"learning_rate": 0.00010796636182256845, |
|
"loss": 0.9257, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.528380249192432, |
|
"grad_norm": 0.26011785864830017, |
|
"learning_rate": 0.00010756481007993063, |
|
"loss": 0.994, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.529533917858791, |
|
"grad_norm": 0.27399560809135437, |
|
"learning_rate": 0.0001071631356193738, |
|
"loss": 0.9628, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.53068758652515, |
|
"grad_norm": 0.25914278626441956, |
|
"learning_rate": 0.00010676134495694439, |
|
"loss": 0.9675, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.531841255191509, |
|
"grad_norm": 0.26276910305023193, |
|
"learning_rate": 0.00010635944461057395, |
|
"loss": 0.9666, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.5329949238578681, |
|
"grad_norm": 0.26222512125968933, |
|
"learning_rate": 0.00010595744109997325, |
|
"loss": 0.9434, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5341485925242271, |
|
"grad_norm": 0.269531786441803, |
|
"learning_rate": 0.00010555534094652675, |
|
"loss": 0.9613, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.5353022611905861, |
|
"grad_norm": 0.2551771104335785, |
|
"learning_rate": 0.00010515315067318652, |
|
"loss": 0.9855, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5364559298569451, |
|
"grad_norm": 0.25686752796173096, |
|
"learning_rate": 0.00010475087680436666, |
|
"loss": 0.9563, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.5376095985233041, |
|
"grad_norm": 0.2565601170063019, |
|
"learning_rate": 0.00010434852586583736, |
|
"loss": 0.966, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5387632671896632, |
|
"grad_norm": 0.2918190360069275, |
|
"learning_rate": 0.000103946104384619, |
|
"loss": 0.9314, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.5399169358560222, |
|
"grad_norm": 0.2644803822040558, |
|
"learning_rate": 0.00010354361888887642, |
|
"loss": 0.9581, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5410706045223812, |
|
"grad_norm": 0.2727797031402588, |
|
"learning_rate": 0.00010314107590781284, |
|
"loss": 0.9633, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.5422242731887402, |
|
"grad_norm": 0.273027241230011, |
|
"learning_rate": 0.00010273848197156401, |
|
"loss": 0.9487, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5433779418550992, |
|
"grad_norm": 0.2540397644042969, |
|
"learning_rate": 0.00010233584361109235, |
|
"loss": 0.9735, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.5445316105214583, |
|
"grad_norm": 0.2538515031337738, |
|
"learning_rate": 0.00010193316735808085, |
|
"loss": 0.9788, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5456852791878173, |
|
"grad_norm": 0.27701374888420105, |
|
"learning_rate": 0.00010153045974482732, |
|
"loss": 0.9557, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.5468389478541763, |
|
"grad_norm": 0.25808069109916687, |
|
"learning_rate": 0.00010112772730413815, |
|
"loss": 0.9243, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5479926165205353, |
|
"grad_norm": 0.256411075592041, |
|
"learning_rate": 0.00010072497656922266, |
|
"loss": 0.9752, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.5491462851868943, |
|
"grad_norm": 0.2654629647731781, |
|
"learning_rate": 0.00010032221407358681, |
|
"loss": 0.9949, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5502999538532534, |
|
"grad_norm": 0.2696619927883148, |
|
"learning_rate": 9.99194463509274e-05, |
|
"loss": 0.971, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.5514536225196124, |
|
"grad_norm": 0.2578607499599457, |
|
"learning_rate": 9.9516679935026e-05, |
|
"loss": 0.9533, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5526072911859714, |
|
"grad_norm": 0.26021629571914673, |
|
"learning_rate": 9.911392135964298e-05, |
|
"loss": 0.9677, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.5537609598523304, |
|
"grad_norm": 0.27699634432792664, |
|
"learning_rate": 9.871117715841151e-05, |
|
"loss": 0.9434, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5549146285186894, |
|
"grad_norm": 0.25480917096138, |
|
"learning_rate": 9.830845386473168e-05, |
|
"loss": 0.9836, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.5560682971850485, |
|
"grad_norm": 0.2536785304546356, |
|
"learning_rate": 9.790575801166432e-05, |
|
"loss": 0.9518, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5572219658514075, |
|
"grad_norm": 0.2608122229576111, |
|
"learning_rate": 9.750309613182505e-05, |
|
"loss": 0.9649, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.5583756345177665, |
|
"grad_norm": 0.25869712233543396, |
|
"learning_rate": 9.710047475727855e-05, |
|
"loss": 0.9668, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5595293031841255, |
|
"grad_norm": 0.25615230202674866, |
|
"learning_rate": 9.669790041943225e-05, |
|
"loss": 0.9062, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.5606829718504845, |
|
"grad_norm": 0.26828473806381226, |
|
"learning_rate": 9.629537964893063e-05, |
|
"loss": 0.9787, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5618366405168436, |
|
"grad_norm": 0.25551798939704895, |
|
"learning_rate": 9.589291897554912e-05, |
|
"loss": 0.9597, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.5629903091832026, |
|
"grad_norm": 0.26706936955451965, |
|
"learning_rate": 9.549052492808834e-05, |
|
"loss": 0.9728, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5641439778495616, |
|
"grad_norm": 0.25929680466651917, |
|
"learning_rate": 9.508820403426799e-05, |
|
"loss": 0.9705, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.5652976465159206, |
|
"grad_norm": 2.2861907482147217, |
|
"learning_rate": 9.468596282062114e-05, |
|
"loss": 0.9766, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5664513151822796, |
|
"grad_norm": 0.2641540467739105, |
|
"learning_rate": 9.428380781238821e-05, |
|
"loss": 1.0021, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.5676049838486387, |
|
"grad_norm": 0.25216275453567505, |
|
"learning_rate": 9.38817455334112e-05, |
|
"loss": 0.9563, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5687586525149977, |
|
"grad_norm": 0.262725293636322, |
|
"learning_rate": 9.347978250602785e-05, |
|
"loss": 0.9765, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.5699123211813567, |
|
"grad_norm": 0.2671465277671814, |
|
"learning_rate": 9.307792525096581e-05, |
|
"loss": 0.9308, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5710659898477157, |
|
"grad_norm": 0.26376160979270935, |
|
"learning_rate": 9.267618028723686e-05, |
|
"loss": 0.9807, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5722196585140747, |
|
"grad_norm": 0.24652628600597382, |
|
"learning_rate": 9.227455413203115e-05, |
|
"loss": 0.9261, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5733733271804338, |
|
"grad_norm": 0.2701497972011566, |
|
"learning_rate": 9.187305330061156e-05, |
|
"loss": 0.9542, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.5745269958467928, |
|
"grad_norm": 0.28991737961769104, |
|
"learning_rate": 9.147168430620787e-05, |
|
"loss": 0.9431, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5756806645131518, |
|
"grad_norm": 0.2657528817653656, |
|
"learning_rate": 9.107045365991123e-05, |
|
"loss": 0.9601, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.5768343331795108, |
|
"grad_norm": 0.25293365120887756, |
|
"learning_rate": 9.066936787056842e-05, |
|
"loss": 0.9528, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5779880018458698, |
|
"grad_norm": 0.26489248871803284, |
|
"learning_rate": 9.026843344467635e-05, |
|
"loss": 0.9509, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.5791416705122289, |
|
"grad_norm": 0.27219951152801514, |
|
"learning_rate": 8.986765688627652e-05, |
|
"loss": 0.9485, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.5802953391785879, |
|
"grad_norm": 0.2632032334804535, |
|
"learning_rate": 8.946704469684939e-05, |
|
"loss": 0.9635, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.5814490078449469, |
|
"grad_norm": 0.25340795516967773, |
|
"learning_rate": 8.906660337520903e-05, |
|
"loss": 0.9315, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.5826026765113059, |
|
"grad_norm": 0.26531898975372314, |
|
"learning_rate": 8.86663394173977e-05, |
|
"loss": 0.974, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.583756345177665, |
|
"grad_norm": 0.26169174909591675, |
|
"learning_rate": 8.826625931658039e-05, |
|
"loss": 0.9416, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.584910013844024, |
|
"grad_norm": 0.27198314666748047, |
|
"learning_rate": 8.786636956293948e-05, |
|
"loss": 0.9827, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.586063682510383, |
|
"grad_norm": 0.2525466978549957, |
|
"learning_rate": 8.746667664356956e-05, |
|
"loss": 0.9506, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.587217351176742, |
|
"grad_norm": 0.2605235278606415, |
|
"learning_rate": 8.706718704237215e-05, |
|
"loss": 0.9747, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.588371019843101, |
|
"grad_norm": 0.263271301984787, |
|
"learning_rate": 8.666790723995042e-05, |
|
"loss": 0.9495, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.58952468850946, |
|
"grad_norm": 0.2623215317726135, |
|
"learning_rate": 8.626884371350421e-05, |
|
"loss": 0.9407, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.5906783571758191, |
|
"grad_norm": 0.2711637616157532, |
|
"learning_rate": 8.587000293672481e-05, |
|
"loss": 0.9332, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.5918320258421781, |
|
"grad_norm": 0.27216318249702454, |
|
"learning_rate": 8.547139137969015e-05, |
|
"loss": 0.9789, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.5929856945085371, |
|
"grad_norm": 0.2510489821434021, |
|
"learning_rate": 8.50730155087596e-05, |
|
"loss": 0.9522, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.5941393631748961, |
|
"grad_norm": 0.27893054485321045, |
|
"learning_rate": 8.46748817864692e-05, |
|
"loss": 0.9849, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5952930318412551, |
|
"grad_norm": 0.2612435817718506, |
|
"learning_rate": 8.427699667142682e-05, |
|
"loss": 0.9512, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.5964467005076142, |
|
"grad_norm": 0.26378709077835083, |
|
"learning_rate": 8.387936661820734e-05, |
|
"loss": 0.8909, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.5976003691739732, |
|
"grad_norm": 0.2865258455276489, |
|
"learning_rate": 8.348199807724806e-05, |
|
"loss": 0.964, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.5987540378403322, |
|
"grad_norm": 0.26403477787971497, |
|
"learning_rate": 8.308489749474388e-05, |
|
"loss": 0.9232, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.5999077065066912, |
|
"grad_norm": 0.2571257948875427, |
|
"learning_rate": 8.268807131254287e-05, |
|
"loss": 0.935, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6010613751730502, |
|
"grad_norm": 0.2499091774225235, |
|
"learning_rate": 8.229152596804168e-05, |
|
"loss": 0.9262, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.6022150438394093, |
|
"grad_norm": 0.2728745639324188, |
|
"learning_rate": 8.189526789408123e-05, |
|
"loss": 0.984, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6033687125057683, |
|
"grad_norm": 0.27502092719078064, |
|
"learning_rate": 8.149930351884221e-05, |
|
"loss": 0.9326, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.6045223811721273, |
|
"grad_norm": 0.26331180334091187, |
|
"learning_rate": 8.110363926574087e-05, |
|
"loss": 0.9026, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6056760498384864, |
|
"grad_norm": 0.25487685203552246, |
|
"learning_rate": 8.070828155332486e-05, |
|
"loss": 0.9307, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.6068297185048455, |
|
"grad_norm": 0.2543911933898926, |
|
"learning_rate": 8.0313236795169e-05, |
|
"loss": 0.9647, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6079833871712045, |
|
"grad_norm": 0.2619852125644684, |
|
"learning_rate": 7.991851139977138e-05, |
|
"loss": 0.9564, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 0.2640511989593506, |
|
"learning_rate": 7.952411177044923e-05, |
|
"loss": 0.9414, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6102907245039225, |
|
"grad_norm": 0.27362698316574097, |
|
"learning_rate": 7.913004430523526e-05, |
|
"loss": 0.9517, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.6114443931702815, |
|
"grad_norm": 0.3130943477153778, |
|
"learning_rate": 7.873631539677364e-05, |
|
"loss": 0.9368, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6125980618366406, |
|
"grad_norm": 0.2534888684749603, |
|
"learning_rate": 7.834293143221642e-05, |
|
"loss": 0.9621, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.6137517305029996, |
|
"grad_norm": 0.2623361349105835, |
|
"learning_rate": 7.794989879311991e-05, |
|
"loss": 0.9424, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6149053991693586, |
|
"grad_norm": 0.2610420882701874, |
|
"learning_rate": 7.755722385534111e-05, |
|
"loss": 0.9385, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.6160590678357176, |
|
"grad_norm": 0.27034202218055725, |
|
"learning_rate": 7.716491298893442e-05, |
|
"loss": 0.9798, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6172127365020766, |
|
"grad_norm": 0.2572305202484131, |
|
"learning_rate": 7.677297255804811e-05, |
|
"loss": 0.9163, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.6183664051684357, |
|
"grad_norm": 0.2526620626449585, |
|
"learning_rate": 7.638140892082117e-05, |
|
"loss": 0.9339, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6195200738347947, |
|
"grad_norm": 0.26071593165397644, |
|
"learning_rate": 7.599022842928017e-05, |
|
"loss": 0.9419, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.6206737425011537, |
|
"grad_norm": 0.25418540835380554, |
|
"learning_rate": 7.559943742923626e-05, |
|
"loss": 0.9372, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6218274111675127, |
|
"grad_norm": 0.2598932385444641, |
|
"learning_rate": 7.520904226018213e-05, |
|
"loss": 0.9561, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.6229810798338717, |
|
"grad_norm": 0.27341189980506897, |
|
"learning_rate": 7.48190492551892e-05, |
|
"loss": 0.9753, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6241347485002308, |
|
"grad_norm": 0.2658282518386841, |
|
"learning_rate": 7.442946474080499e-05, |
|
"loss": 0.9653, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.6252884171665898, |
|
"grad_norm": 0.2684546411037445, |
|
"learning_rate": 7.404029503695028e-05, |
|
"loss": 0.9748, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6264420858329488, |
|
"grad_norm": 0.28556227684020996, |
|
"learning_rate": 7.365154645681681e-05, |
|
"loss": 0.9599, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.6275957544993078, |
|
"grad_norm": 0.2757733166217804, |
|
"learning_rate": 7.32632253067647e-05, |
|
"loss": 0.9678, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6287494231656668, |
|
"grad_norm": 0.25145432353019714, |
|
"learning_rate": 7.287533788622025e-05, |
|
"loss": 0.9201, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.6299030918320259, |
|
"grad_norm": 0.2673650085926056, |
|
"learning_rate": 7.248789048757368e-05, |
|
"loss": 0.9538, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6310567604983849, |
|
"grad_norm": 0.2711561918258667, |
|
"learning_rate": 7.210088939607708e-05, |
|
"loss": 0.9657, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.6322104291647439, |
|
"grad_norm": 0.2643033266067505, |
|
"learning_rate": 7.171434088974251e-05, |
|
"loss": 0.9253, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6333640978311029, |
|
"grad_norm": 0.2604006826877594, |
|
"learning_rate": 7.132825123924006e-05, |
|
"loss": 0.9299, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.6345177664974619, |
|
"grad_norm": 0.26541584730148315, |
|
"learning_rate": 7.094262670779612e-05, |
|
"loss": 0.9345, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.635671435163821, |
|
"grad_norm": 0.2622869610786438, |
|
"learning_rate": 7.055747355109186e-05, |
|
"loss": 0.9559, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.63682510383018, |
|
"grad_norm": 0.27132776379585266, |
|
"learning_rate": 7.017279801716177e-05, |
|
"loss": 0.9611, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.637978772496539, |
|
"grad_norm": 0.25656870007514954, |
|
"learning_rate": 6.978860634629214e-05, |
|
"loss": 0.9387, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.639132441162898, |
|
"grad_norm": 0.26165375113487244, |
|
"learning_rate": 6.940490477092004e-05, |
|
"loss": 0.9169, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.640286109829257, |
|
"grad_norm": 0.24951021373271942, |
|
"learning_rate": 6.902169951553202e-05, |
|
"loss": 0.9349, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.6414397784956161, |
|
"grad_norm": 0.2729644775390625, |
|
"learning_rate": 6.863899679656328e-05, |
|
"loss": 0.9506, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6425934471619751, |
|
"grad_norm": 0.2613593339920044, |
|
"learning_rate": 6.82568028222967e-05, |
|
"loss": 0.9612, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.6437471158283341, |
|
"grad_norm": 0.254482626914978, |
|
"learning_rate": 6.787512379276229e-05, |
|
"loss": 0.9336, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6449007844946931, |
|
"grad_norm": 0.2548394501209259, |
|
"learning_rate": 6.749396589963648e-05, |
|
"loss": 0.9495, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.6460544531610521, |
|
"grad_norm": 0.25152695178985596, |
|
"learning_rate": 6.711333532614168e-05, |
|
"loss": 0.9476, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6472081218274112, |
|
"grad_norm": 0.2617679536342621, |
|
"learning_rate": 6.673323824694606e-05, |
|
"loss": 0.9498, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.6483617904937702, |
|
"grad_norm": 0.26209336519241333, |
|
"learning_rate": 6.63536808280633e-05, |
|
"loss": 0.9377, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6495154591601292, |
|
"grad_norm": 0.2582489848136902, |
|
"learning_rate": 6.597466922675266e-05, |
|
"loss": 0.9341, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.6506691278264882, |
|
"grad_norm": 0.25654134154319763, |
|
"learning_rate": 6.559620959141897e-05, |
|
"loss": 0.922, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6518227964928472, |
|
"grad_norm": 0.25131335854530334, |
|
"learning_rate": 6.521830806151297e-05, |
|
"loss": 0.9427, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.6529764651592063, |
|
"grad_norm": 0.26254597306251526, |
|
"learning_rate": 6.48409707674317e-05, |
|
"loss": 0.9359, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6541301338255653, |
|
"grad_norm": 0.2668085992336273, |
|
"learning_rate": 6.446420383041903e-05, |
|
"loss": 0.9273, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.6552838024919243, |
|
"grad_norm": 0.24858598411083221, |
|
"learning_rate": 6.408801336246645e-05, |
|
"loss": 0.9611, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6564374711582833, |
|
"grad_norm": 0.2663392126560211, |
|
"learning_rate": 6.371240546621378e-05, |
|
"loss": 0.9497, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.6575911398246423, |
|
"grad_norm": 0.26316505670547485, |
|
"learning_rate": 6.333738623485025e-05, |
|
"loss": 0.9674, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6587448084910014, |
|
"grad_norm": 0.26178088784217834, |
|
"learning_rate": 6.296296175201564e-05, |
|
"loss": 0.983, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.6598984771573604, |
|
"grad_norm": 0.25734785199165344, |
|
"learning_rate": 6.258913809170168e-05, |
|
"loss": 0.9689, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6610521458237194, |
|
"grad_norm": 0.2652340829372406, |
|
"learning_rate": 6.22159213181533e-05, |
|
"loss": 0.9831, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.6622058144900784, |
|
"grad_norm": 0.2596532702445984, |
|
"learning_rate": 6.18433174857705e-05, |
|
"loss": 0.9695, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.6633594831564374, |
|
"grad_norm": 0.2508351504802704, |
|
"learning_rate": 6.147133263900995e-05, |
|
"loss": 0.9275, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.6645131518227965, |
|
"grad_norm": 0.26645687222480774, |
|
"learning_rate": 6.1099972812287e-05, |
|
"loss": 0.9584, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6656668204891555, |
|
"grad_norm": 0.3169649541378021, |
|
"learning_rate": 6.072924402987785e-05, |
|
"loss": 0.9506, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.6668204891555145, |
|
"grad_norm": 0.28282660245895386, |
|
"learning_rate": 6.0359152305821766e-05, |
|
"loss": 0.9557, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6679741578218735, |
|
"grad_norm": 0.26345473527908325, |
|
"learning_rate": 5.99897036438235e-05, |
|
"loss": 0.9736, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.6691278264882325, |
|
"grad_norm": 0.2629302442073822, |
|
"learning_rate": 5.962090403715592e-05, |
|
"loss": 0.9299, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6702814951545916, |
|
"grad_norm": 0.2775762677192688, |
|
"learning_rate": 5.925275946856275e-05, |
|
"loss": 0.948, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.6714351638209506, |
|
"grad_norm": 0.2726392149925232, |
|
"learning_rate": 5.8885275910161576e-05, |
|
"loss": 0.9188, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6725888324873096, |
|
"grad_norm": 0.26594123244285583, |
|
"learning_rate": 5.8518459323346974e-05, |
|
"loss": 0.9756, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.6737425011536686, |
|
"grad_norm": 0.2796771824359894, |
|
"learning_rate": 5.8152315658693765e-05, |
|
"loss": 0.9523, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6748961698200276, |
|
"grad_norm": 0.2691529095172882, |
|
"learning_rate": 5.7786850855860376e-05, |
|
"loss": 0.9648, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.6760498384863867, |
|
"grad_norm": 0.25442981719970703, |
|
"learning_rate": 5.7422070843492734e-05, |
|
"loss": 0.9237, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.6772035071527457, |
|
"grad_norm": 0.25092652440071106, |
|
"learning_rate": 5.7057981539127936e-05, |
|
"loss": 0.9545, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.6783571758191047, |
|
"grad_norm": 0.24751439690589905, |
|
"learning_rate": 5.6694588849098154e-05, |
|
"loss": 0.9359, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.6795108444854637, |
|
"grad_norm": 0.25949132442474365, |
|
"learning_rate": 5.633189866843507e-05, |
|
"loss": 0.9119, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.6806645131518227, |
|
"grad_norm": 0.2540017366409302, |
|
"learning_rate": 5.596991688077409e-05, |
|
"loss": 0.938, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 0.2602344751358032, |
|
"learning_rate": 5.560864935825882e-05, |
|
"loss": 0.9278, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.6829718504845408, |
|
"grad_norm": 0.2632952332496643, |
|
"learning_rate": 5.5248101961446065e-05, |
|
"loss": 0.9745, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.6841255191508998, |
|
"grad_norm": 0.25782305002212524, |
|
"learning_rate": 5.4888280539210433e-05, |
|
"loss": 0.9515, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.6852791878172588, |
|
"grad_norm": 0.25692427158355713, |
|
"learning_rate": 5.4529190928649754e-05, |
|
"loss": 0.956, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.686432856483618, |
|
"grad_norm": 0.2693958282470703, |
|
"learning_rate": 5.417083895499024e-05, |
|
"loss": 0.9965, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.687586525149977, |
|
"grad_norm": 0.26253119111061096, |
|
"learning_rate": 5.381323043149191e-05, |
|
"loss": 0.998, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.688740193816336, |
|
"grad_norm": 0.25917479395866394, |
|
"learning_rate": 5.345637115935451e-05, |
|
"loss": 0.9278, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.689893862482695, |
|
"grad_norm": 0.25027045607566833, |
|
"learning_rate": 5.3100266927623156e-05, |
|
"loss": 0.9786, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.691047531149054, |
|
"grad_norm": 0.25722038745880127, |
|
"learning_rate": 5.274492351309461e-05, |
|
"loss": 0.9346, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.6922011998154131, |
|
"grad_norm": 0.2600374221801758, |
|
"learning_rate": 5.2390346680223535e-05, |
|
"loss": 0.9671, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6933548684817721, |
|
"grad_norm": 0.26606494188308716, |
|
"learning_rate": 5.20365421810288e-05, |
|
"loss": 0.9662, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.6945085371481311, |
|
"grad_norm": 0.24954178929328918, |
|
"learning_rate": 5.168351575500049e-05, |
|
"loss": 0.9459, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.6956622058144901, |
|
"grad_norm": 0.2654220759868622, |
|
"learning_rate": 5.133127312900652e-05, |
|
"loss": 0.9687, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.6968158744808491, |
|
"grad_norm": 0.26062485575675964, |
|
"learning_rate": 5.097982001719993e-05, |
|
"loss": 0.9419, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.6979695431472082, |
|
"grad_norm": 0.2603178322315216, |
|
"learning_rate": 5.062916212092594e-05, |
|
"loss": 0.9446, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.6991232118135672, |
|
"grad_norm": 0.261737585067749, |
|
"learning_rate": 5.027930512862976e-05, |
|
"loss": 0.9457, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7002768804799262, |
|
"grad_norm": 0.2532404959201813, |
|
"learning_rate": 4.993025471576417e-05, |
|
"loss": 0.9526, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.7014305491462852, |
|
"grad_norm": 0.2621046006679535, |
|
"learning_rate": 4.958201654469731e-05, |
|
"loss": 0.947, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7025842178126442, |
|
"grad_norm": 0.28589344024658203, |
|
"learning_rate": 4.9234596264621136e-05, |
|
"loss": 0.9693, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.7037378864790033, |
|
"grad_norm": 0.2677820324897766, |
|
"learning_rate": 4.888799951145948e-05, |
|
"loss": 0.959, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7048915551453623, |
|
"grad_norm": 0.26197487115859985, |
|
"learning_rate": 4.854223190777681e-05, |
|
"loss": 0.9605, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.7060452238117213, |
|
"grad_norm": 0.26514580845832825, |
|
"learning_rate": 4.8197299062686995e-05, |
|
"loss": 0.9486, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7071988924780803, |
|
"grad_norm": 0.25191399455070496, |
|
"learning_rate": 4.785320657176217e-05, |
|
"loss": 0.944, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.7083525611444393, |
|
"grad_norm": 0.2851702570915222, |
|
"learning_rate": 4.7509960016942144e-05, |
|
"loss": 0.954, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7095062298107984, |
|
"grad_norm": 0.27994558215141296, |
|
"learning_rate": 4.716756496644381e-05, |
|
"loss": 0.9459, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.7106598984771574, |
|
"grad_norm": 0.2702610492706299, |
|
"learning_rate": 4.682602697467067e-05, |
|
"loss": 0.9566, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7118135671435164, |
|
"grad_norm": 0.2700541019439697, |
|
"learning_rate": 4.648535158212296e-05, |
|
"loss": 0.9475, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.7129672358098754, |
|
"grad_norm": 0.25302648544311523, |
|
"learning_rate": 4.6145544315307534e-05, |
|
"loss": 0.9379, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7141209044762344, |
|
"grad_norm": 0.25596538186073303, |
|
"learning_rate": 4.5806610686648435e-05, |
|
"loss": 0.9658, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.7152745731425935, |
|
"grad_norm": 0.25866860151290894, |
|
"learning_rate": 4.546855619439734e-05, |
|
"loss": 0.9469, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7164282418089525, |
|
"grad_norm": 0.26797473430633545, |
|
"learning_rate": 4.513138632254432e-05, |
|
"loss": 0.9394, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.7175819104753115, |
|
"grad_norm": 0.2578045129776001, |
|
"learning_rate": 4.479510654072909e-05, |
|
"loss": 0.9381, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7187355791416705, |
|
"grad_norm": 0.2649901211261749, |
|
"learning_rate": 4.4459722304151965e-05, |
|
"loss": 0.9537, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.7198892478080295, |
|
"grad_norm": 0.2583473324775696, |
|
"learning_rate": 4.412523905348568e-05, |
|
"loss": 0.9785, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7210429164743886, |
|
"grad_norm": 0.2813000977039337, |
|
"learning_rate": 4.379166221478697e-05, |
|
"loss": 0.9683, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.7221965851407476, |
|
"grad_norm": 0.26045987010002136, |
|
"learning_rate": 4.345899719940843e-05, |
|
"loss": 0.9314, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7233502538071066, |
|
"grad_norm": 0.25337517261505127, |
|
"learning_rate": 4.312724940391114e-05, |
|
"loss": 0.9132, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.7245039224734656, |
|
"grad_norm": 0.2751719355583191, |
|
"learning_rate": 4.279642420997655e-05, |
|
"loss": 0.9446, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7256575911398246, |
|
"grad_norm": 0.2659566402435303, |
|
"learning_rate": 4.246652698431969e-05, |
|
"loss": 0.9171, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.7268112598061837, |
|
"grad_norm": 0.26259174942970276, |
|
"learning_rate": 4.213756307860175e-05, |
|
"loss": 0.9713, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7279649284725427, |
|
"grad_norm": 0.2656116187572479, |
|
"learning_rate": 4.180953782934351e-05, |
|
"loss": 0.9383, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.7291185971389017, |
|
"grad_norm": 0.26042497158050537, |
|
"learning_rate": 4.148245655783869e-05, |
|
"loss": 0.9537, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.7302722658052607, |
|
"grad_norm": 0.2528819739818573, |
|
"learning_rate": 4.115632457006746e-05, |
|
"loss": 0.9687, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.7314259344716197, |
|
"grad_norm": 0.2507370412349701, |
|
"learning_rate": 4.0831147156610684e-05, |
|
"loss": 0.9856, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7325796031379788, |
|
"grad_norm": 0.2561616003513336, |
|
"learning_rate": 4.050692959256377e-05, |
|
"loss": 0.9103, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.7337332718043378, |
|
"grad_norm": 0.25249841809272766, |
|
"learning_rate": 4.018367713745137e-05, |
|
"loss": 0.9277, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7348869404706968, |
|
"grad_norm": 0.25682583451271057, |
|
"learning_rate": 3.986139503514194e-05, |
|
"loss": 0.9787, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.7360406091370558, |
|
"grad_norm": 0.2522560656070709, |
|
"learning_rate": 3.954008851376252e-05, |
|
"loss": 0.9415, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7371942778034148, |
|
"grad_norm": 0.259802907705307, |
|
"learning_rate": 3.9219762785614246e-05, |
|
"loss": 0.9338, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.7383479464697739, |
|
"grad_norm": 0.2639108896255493, |
|
"learning_rate": 3.8900423047087585e-05, |
|
"loss": 0.9653, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7395016151361329, |
|
"grad_norm": 0.27184954285621643, |
|
"learning_rate": 3.8582074478577966e-05, |
|
"loss": 0.9195, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.7406552838024919, |
|
"grad_norm": 0.2584899961948395, |
|
"learning_rate": 3.826472224440202e-05, |
|
"loss": 0.9221, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.7418089524688509, |
|
"grad_norm": 0.25977569818496704, |
|
"learning_rate": 3.794837149271345e-05, |
|
"loss": 0.9724, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.7429626211352099, |
|
"grad_norm": 0.2708073556423187, |
|
"learning_rate": 3.763302735541987e-05, |
|
"loss": 0.9661, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.744116289801569, |
|
"grad_norm": 0.2669990360736847, |
|
"learning_rate": 3.731869494809934e-05, |
|
"loss": 0.9382, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.745269958467928, |
|
"grad_norm": 0.2620033621788025, |
|
"learning_rate": 3.7005379369917325e-05, |
|
"loss": 0.9837, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.746423627134287, |
|
"grad_norm": 0.25972065329551697, |
|
"learning_rate": 3.6693085703544226e-05, |
|
"loss": 0.9258, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.747577295800646, |
|
"grad_norm": 0.26290494203567505, |
|
"learning_rate": 3.638181901507265e-05, |
|
"loss": 0.952, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.748730964467005, |
|
"grad_norm": 0.2657393217086792, |
|
"learning_rate": 3.607158435393544e-05, |
|
"loss": 0.9421, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.7498846331333641, |
|
"grad_norm": 0.2849307954311371, |
|
"learning_rate": 3.576238675282364e-05, |
|
"loss": 0.9276, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7510383017997231, |
|
"grad_norm": 0.2654859721660614, |
|
"learning_rate": 3.545423122760493e-05, |
|
"loss": 0.9299, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.7521919704660821, |
|
"grad_norm": 0.2530565559864044, |
|
"learning_rate": 3.5147122777242204e-05, |
|
"loss": 0.9545, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7533456391324411, |
|
"grad_norm": 0.26191383600234985, |
|
"learning_rate": 3.4841066383712404e-05, |
|
"loss": 0.952, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.7544993077988001, |
|
"grad_norm": 0.25318285822868347, |
|
"learning_rate": 3.4536067011925945e-05, |
|
"loss": 0.9139, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7556529764651592, |
|
"grad_norm": 0.2692861557006836, |
|
"learning_rate": 3.423212960964586e-05, |
|
"loss": 0.9692, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.7568066451315182, |
|
"grad_norm": 0.27093103528022766, |
|
"learning_rate": 3.3929259107407784e-05, |
|
"loss": 0.9167, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7579603137978772, |
|
"grad_norm": 0.25663748383522034, |
|
"learning_rate": 3.362746041843991e-05, |
|
"loss": 0.9418, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.7591139824642362, |
|
"grad_norm": 0.2527766823768616, |
|
"learning_rate": 3.3326738438583114e-05, |
|
"loss": 0.9566, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.7602676511305952, |
|
"grad_norm": 0.25249361991882324, |
|
"learning_rate": 3.302709804621184e-05, |
|
"loss": 0.9745, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.7614213197969543, |
|
"grad_norm": 0.2630962133407593, |
|
"learning_rate": 3.272854410215467e-05, |
|
"loss": 0.944, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7625749884633133, |
|
"grad_norm": 0.2653927206993103, |
|
"learning_rate": 3.243108144961563e-05, |
|
"loss": 0.9386, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.7637286571296723, |
|
"grad_norm": 0.25982698798179626, |
|
"learning_rate": 3.213471491409568e-05, |
|
"loss": 0.9665, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7648823257960313, |
|
"grad_norm": 0.24975992739200592, |
|
"learning_rate": 3.183944930331415e-05, |
|
"loss": 0.9397, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.7660359944623903, |
|
"grad_norm": 0.26783668994903564, |
|
"learning_rate": 3.154528940713113e-05, |
|
"loss": 0.9349, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7671896631287495, |
|
"grad_norm": 0.26724961400032043, |
|
"learning_rate": 3.1252239997469514e-05, |
|
"loss": 0.9726, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.7683433317951085, |
|
"grad_norm": 0.25978267192840576, |
|
"learning_rate": 3.096030582823757e-05, |
|
"loss": 0.963, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.7694970004614675, |
|
"grad_norm": 0.2639058530330658, |
|
"learning_rate": 3.066949163525205e-05, |
|
"loss": 0.9452, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.7706506691278265, |
|
"grad_norm": 0.25383907556533813, |
|
"learning_rate": 3.0379802136161074e-05, |
|
"loss": 0.9211, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.7718043377941856, |
|
"grad_norm": 0.253352552652359, |
|
"learning_rate": 3.009124203036785e-05, |
|
"loss": 0.9334, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.7729580064605446, |
|
"grad_norm": 0.25482332706451416, |
|
"learning_rate": 2.9803815998954332e-05, |
|
"loss": 0.9433, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7741116751269036, |
|
"grad_norm": 0.2710963189601898, |
|
"learning_rate": 2.951752870460519e-05, |
|
"loss": 0.9409, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.7752653437932626, |
|
"grad_norm": 0.2619101107120514, |
|
"learning_rate": 2.9232384791532375e-05, |
|
"loss": 0.971, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.7764190124596216, |
|
"grad_norm": 0.27144715189933777, |
|
"learning_rate": 2.8948388885399568e-05, |
|
"loss": 0.9384, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.7775726811259807, |
|
"grad_norm": 0.25738927721977234, |
|
"learning_rate": 2.8665545593247312e-05, |
|
"loss": 0.936, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.7787263497923397, |
|
"grad_norm": 0.2678247094154358, |
|
"learning_rate": 2.838385950341821e-05, |
|
"loss": 0.9503, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.7798800184586987, |
|
"grad_norm": 0.25635334849357605, |
|
"learning_rate": 2.810333518548246e-05, |
|
"loss": 0.9458, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.7810336871250577, |
|
"grad_norm": 0.26368802785873413, |
|
"learning_rate": 2.7823977190163786e-05, |
|
"loss": 0.952, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.7821873557914167, |
|
"grad_norm": 0.25423574447631836, |
|
"learning_rate": 2.754579004926551e-05, |
|
"loss": 0.9813, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.7833410244577758, |
|
"grad_norm": 0.2504977285861969, |
|
"learning_rate": 2.7268778275597218e-05, |
|
"loss": 0.9251, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.7844946931241348, |
|
"grad_norm": 0.26885226368904114, |
|
"learning_rate": 2.699294636290134e-05, |
|
"loss": 0.9533, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7856483617904938, |
|
"grad_norm": 0.2703065276145935, |
|
"learning_rate": 2.6718298785780426e-05, |
|
"loss": 0.9293, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.7868020304568528, |
|
"grad_norm": 0.25872454047203064, |
|
"learning_rate": 2.6444839999624494e-05, |
|
"loss": 0.9885, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.7879556991232118, |
|
"grad_norm": 0.2541477680206299, |
|
"learning_rate": 2.6172574440538678e-05, |
|
"loss": 0.9365, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.7891093677895709, |
|
"grad_norm": 0.26085081696510315, |
|
"learning_rate": 2.5901506525271425e-05, |
|
"loss": 0.9615, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.7902630364559299, |
|
"grad_norm": 0.2733447253704071, |
|
"learning_rate": 2.5631640651142653e-05, |
|
"loss": 0.955, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.7914167051222889, |
|
"grad_norm": 0.26850977540016174, |
|
"learning_rate": 2.5362981195972625e-05, |
|
"loss": 0.9904, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.7925703737886479, |
|
"grad_norm": 0.25592347979545593, |
|
"learning_rate": 2.509553251801078e-05, |
|
"loss": 0.9888, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.7937240424550069, |
|
"grad_norm": 0.25363269448280334, |
|
"learning_rate": 2.482929895586502e-05, |
|
"loss": 0.922, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.794877711121366, |
|
"grad_norm": 0.26529207825660706, |
|
"learning_rate": 2.4564284828431495e-05, |
|
"loss": 0.9167, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.796031379787725, |
|
"grad_norm": 0.264886736869812, |
|
"learning_rate": 2.4300494434824373e-05, |
|
"loss": 0.9587, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.797185048454084, |
|
"grad_norm": 0.26478299498558044, |
|
"learning_rate": 2.403793205430612e-05, |
|
"loss": 0.9532, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.798338717120443, |
|
"grad_norm": 0.26431044936180115, |
|
"learning_rate": 2.3776601946218223e-05, |
|
"loss": 0.9533, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.799492385786802, |
|
"grad_norm": 0.2664218246936798, |
|
"learning_rate": 2.351650834991187e-05, |
|
"loss": 0.9451, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.8006460544531611, |
|
"grad_norm": 0.25591370463371277, |
|
"learning_rate": 2.3257655484679374e-05, |
|
"loss": 0.9164, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8017997231195201, |
|
"grad_norm": 0.2605327069759369, |
|
"learning_rate": 2.300004754968568e-05, |
|
"loss": 0.9632, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.8029533917858791, |
|
"grad_norm": 0.26043200492858887, |
|
"learning_rate": 2.274368872390009e-05, |
|
"loss": 0.9781, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8041070604522381, |
|
"grad_norm": 0.256830632686615, |
|
"learning_rate": 2.2488583166028754e-05, |
|
"loss": 0.9832, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.8052607291185971, |
|
"grad_norm": 0.2615990936756134, |
|
"learning_rate": 2.2234735014446907e-05, |
|
"loss": 0.9165, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8064143977849562, |
|
"grad_norm": 0.2640646994113922, |
|
"learning_rate": 2.1982148387131972e-05, |
|
"loss": 0.9642, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.8075680664513152, |
|
"grad_norm": 0.28312572836875916, |
|
"learning_rate": 2.1730827381596643e-05, |
|
"loss": 0.9532, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8087217351176742, |
|
"grad_norm": 0.2744578421115875, |
|
"learning_rate": 2.148077607482242e-05, |
|
"loss": 0.9206, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.8098754037840332, |
|
"grad_norm": 0.266108900308609, |
|
"learning_rate": 2.123199852319352e-05, |
|
"loss": 0.955, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8110290724503922, |
|
"grad_norm": 0.26437053084373474, |
|
"learning_rate": 2.098449876243096e-05, |
|
"loss": 0.9314, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 0.2548779547214508, |
|
"learning_rate": 2.0738280807527276e-05, |
|
"loss": 0.9231, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8133364097831103, |
|
"grad_norm": 0.2563740015029907, |
|
"learning_rate": 2.0493348652681187e-05, |
|
"loss": 0.9955, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.8144900784494693, |
|
"grad_norm": 0.25444361567497253, |
|
"learning_rate": 2.024970627123295e-05, |
|
"loss": 0.9399, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8156437471158283, |
|
"grad_norm": 0.2691180109977722, |
|
"learning_rate": 2.0007357615599863e-05, |
|
"loss": 0.937, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.8167974157821873, |
|
"grad_norm": 0.2676614224910736, |
|
"learning_rate": 1.9766306617212072e-05, |
|
"loss": 0.9454, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8179510844485464, |
|
"grad_norm": 0.2619343101978302, |
|
"learning_rate": 1.9526557186448922e-05, |
|
"loss": 0.9622, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.8191047531149054, |
|
"grad_norm": 0.25831881165504456, |
|
"learning_rate": 1.9288113212575452e-05, |
|
"loss": 0.9554, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8202584217812644, |
|
"grad_norm": 0.27389928698539734, |
|
"learning_rate": 1.9050978563679245e-05, |
|
"loss": 0.9845, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.8214120904476234, |
|
"grad_norm": 0.2780735194683075, |
|
"learning_rate": 1.8815157086607826e-05, |
|
"loss": 0.9868, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8225657591139824, |
|
"grad_norm": 0.26343536376953125, |
|
"learning_rate": 1.858065260690609e-05, |
|
"loss": 0.9432, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.8237194277803415, |
|
"grad_norm": 0.2577558159828186, |
|
"learning_rate": 1.8347468928754407e-05, |
|
"loss": 0.9374, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8248730964467005, |
|
"grad_norm": 0.2493932992219925, |
|
"learning_rate": 1.811560983490682e-05, |
|
"loss": 0.9687, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.8260267651130595, |
|
"grad_norm": 0.2670118510723114, |
|
"learning_rate": 1.78850790866296e-05, |
|
"loss": 0.981, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8271804337794185, |
|
"grad_norm": 0.2631658613681793, |
|
"learning_rate": 1.765588042364045e-05, |
|
"loss": 0.9775, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.8283341024457775, |
|
"grad_norm": 0.2607511878013611, |
|
"learning_rate": 1.7428017564047594e-05, |
|
"loss": 0.9409, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8294877711121366, |
|
"grad_norm": 0.283981591463089, |
|
"learning_rate": 1.7201494204289647e-05, |
|
"loss": 1.015, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.8306414397784956, |
|
"grad_norm": 0.2698068916797638, |
|
"learning_rate": 1.697631401907559e-05, |
|
"loss": 0.9332, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8317951084448546, |
|
"grad_norm": 0.2729108929634094, |
|
"learning_rate": 1.6752480661325078e-05, |
|
"loss": 0.9455, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.8329487771112136, |
|
"grad_norm": 0.2679850459098816, |
|
"learning_rate": 1.6529997762109317e-05, |
|
"loss": 0.9306, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8341024457775726, |
|
"grad_norm": 0.2747521698474884, |
|
"learning_rate": 1.6308868930592014e-05, |
|
"loss": 0.9063, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.8352561144439317, |
|
"grad_norm": 0.26654738187789917, |
|
"learning_rate": 1.608909775397106e-05, |
|
"loss": 0.9431, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8364097831102907, |
|
"grad_norm": 0.2640974521636963, |
|
"learning_rate": 1.5870687797420013e-05, |
|
"loss": 0.9476, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.8375634517766497, |
|
"grad_norm": 0.259022057056427, |
|
"learning_rate": 1.565364260403055e-05, |
|
"loss": 0.9037, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8387171204430087, |
|
"grad_norm": 0.2541236877441406, |
|
"learning_rate": 1.5437965694754842e-05, |
|
"loss": 0.9287, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.8398707891093677, |
|
"grad_norm": 0.27789777517318726, |
|
"learning_rate": 1.5223660568348442e-05, |
|
"loss": 0.9304, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8410244577757268, |
|
"grad_norm": 0.259775847196579, |
|
"learning_rate": 1.5010730701313625e-05, |
|
"loss": 0.9337, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.8421781264420858, |
|
"grad_norm": 0.26516568660736084, |
|
"learning_rate": 1.4799179547842822e-05, |
|
"loss": 0.9638, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8433317951084448, |
|
"grad_norm": 0.26108843088150024, |
|
"learning_rate": 1.4589010539762794e-05, |
|
"loss": 0.9627, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.8444854637748038, |
|
"grad_norm": 0.2728930711746216, |
|
"learning_rate": 1.4380227086478815e-05, |
|
"loss": 0.9148, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8456391324411628, |
|
"grad_norm": 0.26303428411483765, |
|
"learning_rate": 1.417283257491936e-05, |
|
"loss": 0.9355, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.846792801107522, |
|
"grad_norm": 0.25789615511894226, |
|
"learning_rate": 1.3966830369481232e-05, |
|
"loss": 0.9213, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.847946469773881, |
|
"grad_norm": 0.25677064061164856, |
|
"learning_rate": 1.3762223811975005e-05, |
|
"loss": 0.9607, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.84910013844024, |
|
"grad_norm": 0.25708821415901184, |
|
"learning_rate": 1.3559016221570663e-05, |
|
"loss": 0.9251, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.850253807106599, |
|
"grad_norm": 0.26253747940063477, |
|
"learning_rate": 1.3357210894743954e-05, |
|
"loss": 0.9342, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.8514074757729581, |
|
"grad_norm": 0.2784782946109772, |
|
"learning_rate": 1.3156811105222721e-05, |
|
"loss": 0.924, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8525611444393171, |
|
"grad_norm": 0.2603437304496765, |
|
"learning_rate": 1.295782010393396e-05, |
|
"loss": 0.9589, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.8537148131056761, |
|
"grad_norm": 0.2674865126609802, |
|
"learning_rate": 1.2760241118951011e-05, |
|
"loss": 0.9642, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8548684817720351, |
|
"grad_norm": 0.28415897488594055, |
|
"learning_rate": 1.256407735544114e-05, |
|
"loss": 0.9345, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.8560221504383941, |
|
"grad_norm": 0.2664172947406769, |
|
"learning_rate": 1.2369331995613665e-05, |
|
"loss": 0.9703, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8571758191047532, |
|
"grad_norm": 0.2628554403781891, |
|
"learning_rate": 1.2176008198668164e-05, |
|
"loss": 0.953, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.8583294877711122, |
|
"grad_norm": 0.26892125606536865, |
|
"learning_rate": 1.1984109100743446e-05, |
|
"loss": 0.964, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8594831564374712, |
|
"grad_norm": 0.2509150803089142, |
|
"learning_rate": 1.179363781486651e-05, |
|
"loss": 0.9529, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.8606368251038302, |
|
"grad_norm": 0.263091504573822, |
|
"learning_rate": 1.160459743090203e-05, |
|
"loss": 0.9439, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8617904937701892, |
|
"grad_norm": 0.26646509766578674, |
|
"learning_rate": 1.1416991015502388e-05, |
|
"loss": 0.9699, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.8629441624365483, |
|
"grad_norm": 0.26804062724113464, |
|
"learning_rate": 1.1230821612057751e-05, |
|
"loss": 0.9448, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8640978311029073, |
|
"grad_norm": 0.2859780192375183, |
|
"learning_rate": 1.1046092240646865e-05, |
|
"loss": 0.939, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.8652514997692663, |
|
"grad_norm": 0.257179856300354, |
|
"learning_rate": 1.0862805897987894e-05, |
|
"loss": 0.9542, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8664051684356253, |
|
"grad_norm": 0.26742085814476013, |
|
"learning_rate": 1.0680965557389932e-05, |
|
"loss": 0.9531, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.8675588371019843, |
|
"grad_norm": 0.2629797160625458, |
|
"learning_rate": 1.0500574168704746e-05, |
|
"loss": 0.9647, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.8687125057683434, |
|
"grad_norm": 0.2643450200557709, |
|
"learning_rate": 1.0321634658278834e-05, |
|
"loss": 0.9364, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.8698661744347024, |
|
"grad_norm": 0.2526227831840515, |
|
"learning_rate": 1.014414992890611e-05, |
|
"loss": 0.9297, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.8710198431010614, |
|
"grad_norm": 0.2794305682182312, |
|
"learning_rate": 9.968122859780648e-06, |
|
"loss": 0.983, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.8721735117674204, |
|
"grad_norm": 0.2461376041173935, |
|
"learning_rate": 9.793556306450125e-06, |
|
"loss": 0.9312, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.8733271804337794, |
|
"grad_norm": 0.2625856101512909, |
|
"learning_rate": 9.6204531007694e-06, |
|
"loss": 0.9691, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.8744808491001385, |
|
"grad_norm": 0.2845028340816498, |
|
"learning_rate": 9.44881605085456e-06, |
|
"loss": 0.9782, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.8756345177664975, |
|
"grad_norm": 0.258274644613266, |
|
"learning_rate": 9.278647941037477e-06, |
|
"loss": 0.9503, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.8767881864328565, |
|
"grad_norm": 0.2558533847332001, |
|
"learning_rate": 9.10995153182056e-06, |
|
"loss": 0.9396, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.8779418550992155, |
|
"grad_norm": 0.2619149684906006, |
|
"learning_rate": 8.94272955983192e-06, |
|
"loss": 0.9637, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.8790955237655745, |
|
"grad_norm": 0.27039748430252075, |
|
"learning_rate": 8.776984737781135e-06, |
|
"loss": 0.9782, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.8802491924319336, |
|
"grad_norm": 0.26006579399108887, |
|
"learning_rate": 8.612719754415077e-06, |
|
"loss": 0.9673, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.8814028610982926, |
|
"grad_norm": 0.2625749111175537, |
|
"learning_rate": 8.449937274474396e-06, |
|
"loss": 0.9224, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.8825565297646516, |
|
"grad_norm": 0.26278451085090637, |
|
"learning_rate": 8.28863993865029e-06, |
|
"loss": 0.9762, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.8837101984310106, |
|
"grad_norm": 0.25975650548934937, |
|
"learning_rate": 8.128830363541574e-06, |
|
"loss": 0.9697, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.8848638670973696, |
|
"grad_norm": 0.2637532949447632, |
|
"learning_rate": 7.970511141612392e-06, |
|
"loss": 0.9278, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.8860175357637287, |
|
"grad_norm": 0.2644905149936676, |
|
"learning_rate": 7.81368484114996e-06, |
|
"loss": 0.9218, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.8871712044300877, |
|
"grad_norm": 0.25665807723999023, |
|
"learning_rate": 7.65835400622309e-06, |
|
"loss": 0.9872, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.8883248730964467, |
|
"grad_norm": 0.26085856556892395, |
|
"learning_rate": 7.504521156640853e-06, |
|
"loss": 0.9236, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.8894785417628057, |
|
"grad_norm": 0.2720387578010559, |
|
"learning_rate": 7.352188787911618e-06, |
|
"loss": 0.9589, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.8906322104291647, |
|
"grad_norm": 0.26417550444602966, |
|
"learning_rate": 7.201359371202699e-06, |
|
"loss": 0.9664, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.8917858790955238, |
|
"grad_norm": 0.2652938663959503, |
|
"learning_rate": 7.05203535330019e-06, |
|
"loss": 0.926, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.8929395477618828, |
|
"grad_norm": 0.2653699517250061, |
|
"learning_rate": 6.904219156569325e-06, |
|
"loss": 0.9923, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.8940932164282418, |
|
"grad_norm": 0.2711726129055023, |
|
"learning_rate": 6.757913178915087e-06, |
|
"loss": 0.911, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.8952468850946008, |
|
"grad_norm": 0.2669640779495239, |
|
"learning_rate": 6.613119793743428e-06, |
|
"loss": 0.9675, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.8964005537609598, |
|
"grad_norm": 0.29151153564453125, |
|
"learning_rate": 6.469841349922689e-06, |
|
"loss": 0.9209, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.8975542224273189, |
|
"grad_norm": 0.2601694166660309, |
|
"learning_rate": 6.32808017174551e-06, |
|
"loss": 0.9558, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.8987078910936779, |
|
"grad_norm": 0.26199638843536377, |
|
"learning_rate": 6.187838558891157e-06, |
|
"loss": 0.9613, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.8998615597600369, |
|
"grad_norm": 0.2727982699871063, |
|
"learning_rate": 6.049118786388152e-06, |
|
"loss": 0.9568, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9010152284263959, |
|
"grad_norm": 0.2732504606246948, |
|
"learning_rate": 5.911923104577455e-06, |
|
"loss": 0.9737, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.9021688970927549, |
|
"grad_norm": 0.2629091441631317, |
|
"learning_rate": 5.7762537390758875e-06, |
|
"loss": 0.9901, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.903322565759114, |
|
"grad_norm": 0.2660592496395111, |
|
"learning_rate": 5.642112890740036e-06, |
|
"loss": 0.9522, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.904476234425473, |
|
"grad_norm": 0.2575923502445221, |
|
"learning_rate": 5.509502735630601e-06, |
|
"loss": 0.9218, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.905629903091832, |
|
"grad_norm": 0.25905632972717285, |
|
"learning_rate": 5.3784254249770296e-06, |
|
"loss": 0.9516, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.906783571758191, |
|
"grad_norm": 0.29798364639282227, |
|
"learning_rate": 5.248883085142653e-06, |
|
"loss": 0.9691, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.90793724042455, |
|
"grad_norm": 0.2637086510658264, |
|
"learning_rate": 5.120877817590197e-06, |
|
"loss": 0.9274, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.25364989042282104, |
|
"learning_rate": 4.994411698847667e-06, |
|
"loss": 0.933, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9102445777572681, |
|
"grad_norm": 0.24825413525104523, |
|
"learning_rate": 4.869486780474685e-06, |
|
"loss": 0.9442, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.9113982464236271, |
|
"grad_norm": 0.25310808420181274, |
|
"learning_rate": 4.746105089029229e-06, |
|
"loss": 0.9469, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9125519150899861, |
|
"grad_norm": 0.26140066981315613, |
|
"learning_rate": 4.624268626034667e-06, |
|
"loss": 0.9754, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.9137055837563451, |
|
"grad_norm": 0.2598501443862915, |
|
"learning_rate": 4.50397936794742e-06, |
|
"loss": 0.9462, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9148592524227042, |
|
"grad_norm": 0.25906556844711304, |
|
"learning_rate": 4.385239266124752e-06, |
|
"loss": 0.9194, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.9160129210890632, |
|
"grad_norm": 0.26104477047920227, |
|
"learning_rate": 4.268050246793276e-06, |
|
"loss": 0.934, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.9171665897554222, |
|
"grad_norm": 0.2576802968978882, |
|
"learning_rate": 4.1524142110175745e-06, |
|
"loss": 0.9832, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.9183202584217812, |
|
"grad_norm": 0.28592053055763245, |
|
"learning_rate": 4.038333034669406e-06, |
|
"loss": 1.0201, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9194739270881402, |
|
"grad_norm": 0.2563294470310211, |
|
"learning_rate": 3.9258085683972935e-06, |
|
"loss": 0.9904, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.9206275957544993, |
|
"grad_norm": 0.2748868763446808, |
|
"learning_rate": 3.814842637596483e-06, |
|
"loss": 0.9739, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9217812644208583, |
|
"grad_norm": 0.2553744316101074, |
|
"learning_rate": 3.705437042379334e-06, |
|
"loss": 1.0174, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.9229349330872173, |
|
"grad_norm": 0.2620948553085327, |
|
"learning_rate": 3.5975935575461083e-06, |
|
"loss": 0.9456, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9240886017535763, |
|
"grad_norm": 0.2571701407432556, |
|
"learning_rate": 3.491313932556206e-06, |
|
"loss": 0.9339, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.9252422704199353, |
|
"grad_norm": 0.26273953914642334, |
|
"learning_rate": 3.3865998914997643e-06, |
|
"loss": 0.9831, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9263959390862944, |
|
"grad_norm": 0.25073423981666565, |
|
"learning_rate": 3.2834531330696694e-06, |
|
"loss": 0.9478, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.9275496077526535, |
|
"grad_norm": 0.2635844051837921, |
|
"learning_rate": 3.1818753305340565e-06, |
|
"loss": 0.9356, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.9287032764190125, |
|
"grad_norm": 0.25664207339286804, |
|
"learning_rate": 3.081868131709109e-06, |
|
"loss": 0.9419, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.9298569450853715, |
|
"grad_norm": 0.2655963897705078, |
|
"learning_rate": 2.9834331589323693e-06, |
|
"loss": 0.9114, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9310106137517306, |
|
"grad_norm": 0.2534726858139038, |
|
"learning_rate": 2.8865720090364034e-06, |
|
"loss": 0.9644, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.9321642824180896, |
|
"grad_norm": 0.2585753798484802, |
|
"learning_rate": 2.7912862533228558e-06, |
|
"loss": 0.9495, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9333179510844486, |
|
"grad_norm": 0.25545534491539, |
|
"learning_rate": 2.6975774375370464e-06, |
|
"loss": 0.9604, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.9344716197508076, |
|
"grad_norm": 0.2509816884994507, |
|
"learning_rate": 2.6054470818428377e-06, |
|
"loss": 0.9603, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.9356252884171666, |
|
"grad_norm": 0.26575180888175964, |
|
"learning_rate": 2.5148966807979733e-06, |
|
"loss": 0.9527, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.9367789570835257, |
|
"grad_norm": 0.25556278228759766, |
|
"learning_rate": 2.4259277033298555e-06, |
|
"loss": 0.9592, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9379326257498847, |
|
"grad_norm": 0.26294147968292236, |
|
"learning_rate": 2.338541592711696e-06, |
|
"loss": 0.954, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.9390862944162437, |
|
"grad_norm": 0.25935277342796326, |
|
"learning_rate": 2.2527397665391027e-06, |
|
"loss": 0.9491, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9402399630826027, |
|
"grad_norm": 0.2684488594532013, |
|
"learning_rate": 2.168523616707141e-06, |
|
"loss": 0.9403, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.9413936317489617, |
|
"grad_norm": 0.26326921582221985, |
|
"learning_rate": 2.0858945093876316e-06, |
|
"loss": 0.9381, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9425473004153208, |
|
"grad_norm": 0.2741606831550598, |
|
"learning_rate": 2.0048537850071325e-06, |
|
"loss": 0.9475, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.9437009690816798, |
|
"grad_norm": 0.2589118480682373, |
|
"learning_rate": 1.925402758225059e-06, |
|
"loss": 0.981, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9448546377480388, |
|
"grad_norm": 0.2977379262447357, |
|
"learning_rate": 1.8475427179124871e-06, |
|
"loss": 0.9404, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.9460083064143978, |
|
"grad_norm": 0.26037946343421936, |
|
"learning_rate": 1.771274927131139e-06, |
|
"loss": 0.9337, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.9471619750807568, |
|
"grad_norm": 0.25153833627700806, |
|
"learning_rate": 1.6966006231129317e-06, |
|
"loss": 0.9531, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.9483156437471159, |
|
"grad_norm": 0.25663888454437256, |
|
"learning_rate": 1.6235210172399372e-06, |
|
"loss": 0.972, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9494693124134749, |
|
"grad_norm": 0.24960479140281677, |
|
"learning_rate": 1.5520372950246887e-06, |
|
"loss": 0.9229, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.9506229810798339, |
|
"grad_norm": 0.2711315453052521, |
|
"learning_rate": 1.4821506160909493e-06, |
|
"loss": 0.9771, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9517766497461929, |
|
"grad_norm": 0.24772925674915314, |
|
"learning_rate": 1.4138621141549402e-06, |
|
"loss": 0.9495, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.9529303184125519, |
|
"grad_norm": 0.2522965967655182, |
|
"learning_rate": 1.3471728970068987e-06, |
|
"loss": 0.932, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.954083987078911, |
|
"grad_norm": 0.24729067087173462, |
|
"learning_rate": 1.2820840464931704e-06, |
|
"loss": 0.9023, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.95523765574527, |
|
"grad_norm": 0.24839915335178375, |
|
"learning_rate": 1.2185966184985685e-06, |
|
"loss": 0.9072, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.956391324411629, |
|
"grad_norm": 0.2570428252220154, |
|
"learning_rate": 1.1567116429293423e-06, |
|
"loss": 0.9276, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.957544993077988, |
|
"grad_norm": 0.26178133487701416, |
|
"learning_rate": 1.0964301236963904e-06, |
|
"loss": 0.9461, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.958698661744347, |
|
"grad_norm": 0.2655417323112488, |
|
"learning_rate": 1.0377530386990408e-06, |
|
"loss": 0.9586, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.9598523304107061, |
|
"grad_norm": 0.2616187036037445, |
|
"learning_rate": 9.80681339809142e-07, |
|
"loss": 0.952, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.9610059990770651, |
|
"grad_norm": 0.2501654326915741, |
|
"learning_rate": 9.252159528556403e-07, |
|
"loss": 0.9187, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.9621596677434241, |
|
"grad_norm": 0.261311411857605, |
|
"learning_rate": 8.713577776095494e-07, |
|
"loss": 0.9637, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.9633133364097831, |
|
"grad_norm": 0.26594147086143494, |
|
"learning_rate": 8.191076877693604e-07, |
|
"loss": 0.946, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.9644670050761421, |
|
"grad_norm": 0.25597435235977173, |
|
"learning_rate": 7.684665309468875e-07, |
|
"loss": 0.9229, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.9656206737425012, |
|
"grad_norm": 0.2549925446510315, |
|
"learning_rate": 7.194351286534784e-07, |
|
"loss": 0.966, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.9667743424088602, |
|
"grad_norm": 0.24757057428359985, |
|
"learning_rate": 6.720142762867032e-07, |
|
"loss": 0.9443, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.9679280110752192, |
|
"grad_norm": 0.26646557450294495, |
|
"learning_rate": 6.262047431174866e-07, |
|
"loss": 0.9548, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.9690816797415782, |
|
"grad_norm": 0.25552335381507874, |
|
"learning_rate": 5.820072722775849e-07, |
|
"loss": 0.941, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.9702353484079372, |
|
"grad_norm": 0.2553008496761322, |
|
"learning_rate": 5.394225807475284e-07, |
|
"loss": 0.9398, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.9713890170742963, |
|
"grad_norm": 0.26585790514945984, |
|
"learning_rate": 4.984513593450424e-07, |
|
"loss": 0.9469, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.9725426857406553, |
|
"grad_norm": 0.25513309240341187, |
|
"learning_rate": 4.5909427271374485e-07, |
|
"loss": 0.9518, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.9736963544070143, |
|
"grad_norm": 0.2652360796928406, |
|
"learning_rate": 4.2135195931249926e-07, |
|
"loss": 0.9489, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.9748500230733733, |
|
"grad_norm": 0.2571764588356018, |
|
"learning_rate": 3.8522503140493436e-07, |
|
"loss": 0.9149, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.9760036917397323, |
|
"grad_norm": 0.2566836178302765, |
|
"learning_rate": 3.50714075049563e-07, |
|
"loss": 0.9953, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.9771573604060914, |
|
"grad_norm": 0.290528804063797, |
|
"learning_rate": 3.178196500903008e-07, |
|
"loss": 0.9222, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.9783110290724504, |
|
"grad_norm": 0.2592345178127289, |
|
"learning_rate": 2.8654229014730694e-07, |
|
"loss": 0.9206, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.9794646977388094, |
|
"grad_norm": 0.2652153968811035, |
|
"learning_rate": 2.568825026084354e-07, |
|
"loss": 0.9443, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.9806183664051684, |
|
"grad_norm": 0.24910645186901093, |
|
"learning_rate": 2.288407686208971e-07, |
|
"loss": 0.9519, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.9817720350715274, |
|
"grad_norm": 0.25289344787597656, |
|
"learning_rate": 2.024175430835329e-07, |
|
"loss": 0.9381, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.9829257037378865, |
|
"grad_norm": 0.25260549783706665, |
|
"learning_rate": 1.7761325463937494e-07, |
|
"loss": 0.9563, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.9840793724042455, |
|
"grad_norm": 0.2648310363292694, |
|
"learning_rate": 1.5442830566874123e-07, |
|
"loss": 0.9439, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.9852330410706045, |
|
"grad_norm": 0.2567463517189026, |
|
"learning_rate": 1.3286307228269623e-07, |
|
"loss": 0.9289, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.9863867097369635, |
|
"grad_norm": 0.2639390826225281, |
|
"learning_rate": 1.1291790431692262e-07, |
|
"loss": 0.9529, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.9875403784033225, |
|
"grad_norm": 0.2619445323944092, |
|
"learning_rate": 9.459312532608122e-08, |
|
"loss": 0.9555, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.9886940470696816, |
|
"grad_norm": 0.2608962059020996, |
|
"learning_rate": 7.788903257852643e-08, |
|
"loss": 0.9886, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.9898477157360406, |
|
"grad_norm": 0.24952927231788635, |
|
"learning_rate": 6.280589705153217e-08, |
|
"loss": 0.9446, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.9910013844023996, |
|
"grad_norm": 0.2512921988964081, |
|
"learning_rate": 4.934396342684e-08, |
|
"loss": 0.9613, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.9921550530687586, |
|
"grad_norm": 0.2717246413230896, |
|
"learning_rate": 3.750345008675105e-08, |
|
"loss": 0.9526, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.9933087217351176, |
|
"grad_norm": 0.2628538906574249, |
|
"learning_rate": 2.728454911050671e-08, |
|
"loss": 0.9467, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.9944623904014767, |
|
"grad_norm": 0.26591312885284424, |
|
"learning_rate": 1.8687426271246645e-08, |
|
"loss": 0.9604, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.9956160590678357, |
|
"grad_norm": 0.26221415400505066, |
|
"learning_rate": 1.1712221033288728e-08, |
|
"loss": 0.9664, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.9967697277341947, |
|
"grad_norm": 0.2693996727466583, |
|
"learning_rate": 6.359046549864189e-09, |
|
"loss": 0.966, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.9979233964005537, |
|
"grad_norm": 0.26124852895736694, |
|
"learning_rate": 2.627989661252439e-09, |
|
"loss": 0.9511, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.9990770650669127, |
|
"grad_norm": 0.2654908299446106, |
|
"learning_rate": 5.191108934710087e-10, |
|
"loss": 0.9212, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.9544818997383118, |
|
"eval_runtime": 1126.36, |
|
"eval_samples_per_second": 13.628, |
|
"eval_steps_per_second": 0.426, |
|
"step": 4334 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4334, |
|
"total_flos": 1.2189453453533118e+19, |
|
"train_loss": 0.9622706794551732, |
|
"train_runtime": 38294.8783, |
|
"train_samples_per_second": 3.622, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4334, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2189453453533118e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|