|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.086340371904015, |
|
"eval_steps": 500, |
|
"global_step": 600000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0025719503099200123, |
|
"grad_norm": 1.539023995399475, |
|
"learning_rate": 5e-06, |
|
"loss": 10.2802, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.005143900619840025, |
|
"grad_norm": 1.201340913772583, |
|
"learning_rate": 1e-05, |
|
"loss": 9.1163, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.007715850929760037, |
|
"grad_norm": 1.3188607692718506, |
|
"learning_rate": 1.5e-05, |
|
"loss": 8.454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01028780123968005, |
|
"grad_norm": 1.3608899116516113, |
|
"learning_rate": 2e-05, |
|
"loss": 8.0852, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.012859751549600063, |
|
"grad_norm": 1.5117723941802979, |
|
"learning_rate": 2.5e-05, |
|
"loss": 7.8255, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.015431701859520074, |
|
"grad_norm": 1.4807263612747192, |
|
"learning_rate": 3e-05, |
|
"loss": 7.6419, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.018003652169440085, |
|
"grad_norm": 1.9357377290725708, |
|
"learning_rate": 3.5e-05, |
|
"loss": 7.543, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0205756024793601, |
|
"grad_norm": 1.4053044319152832, |
|
"learning_rate": 4e-05, |
|
"loss": 7.4304, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.023147552789280112, |
|
"grad_norm": 1.7819926738739014, |
|
"learning_rate": 4.5e-05, |
|
"loss": 7.3563, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.025719503099200125, |
|
"grad_norm": 1.5757050514221191, |
|
"learning_rate": 5e-05, |
|
"loss": 7.2987, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.028291453409120135, |
|
"grad_norm": 1.6142442226409912, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 7.237, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.030863403719040148, |
|
"grad_norm": 1.4982831478118896, |
|
"learning_rate": 6e-05, |
|
"loss": 7.1706, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.03343535402896016, |
|
"grad_norm": 2.2230963706970215, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 7.1074, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03600730433888017, |
|
"grad_norm": 1.8124334812164307, |
|
"learning_rate": 7e-05, |
|
"loss": 7.0167, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03857925464880019, |
|
"grad_norm": 2.228245258331299, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 6.9312, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.0411512049587202, |
|
"grad_norm": 2.270578145980835, |
|
"learning_rate": 8e-05, |
|
"loss": 6.8237, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04372315526864021, |
|
"grad_norm": 2.2809226512908936, |
|
"learning_rate": 8.499e-05, |
|
"loss": 6.6923, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.046295105578560224, |
|
"grad_norm": 2.0048675537109375, |
|
"learning_rate": 8.999000000000001e-05, |
|
"loss": 6.552, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.04886705588848023, |
|
"grad_norm": 3.099470853805542, |
|
"learning_rate": 9.499e-05, |
|
"loss": 6.3139, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.05143900619840025, |
|
"grad_norm": 2.3764562606811523, |
|
"learning_rate": 9.999000000000001e-05, |
|
"loss": 5.9019, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.05401095650832026, |
|
"grad_norm": 2.565927743911743, |
|
"learning_rate": 9.994959595959596e-05, |
|
"loss": 5.3657, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.05658290681824027, |
|
"grad_norm": 2.563119649887085, |
|
"learning_rate": 9.989909090909091e-05, |
|
"loss": 5.0573, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.059154857128160286, |
|
"grad_norm": 2.43448805809021, |
|
"learning_rate": 9.984858585858586e-05, |
|
"loss": 4.8008, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.061726807438080296, |
|
"grad_norm": 2.7851388454437256, |
|
"learning_rate": 9.979808080808082e-05, |
|
"loss": 4.5791, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.06429875774800031, |
|
"grad_norm": 3.024442195892334, |
|
"learning_rate": 9.974757575757576e-05, |
|
"loss": 4.3963, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.06687070805792032, |
|
"grad_norm": 2.799959421157837, |
|
"learning_rate": 9.969717171717172e-05, |
|
"loss": 4.1974, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.06944265836784033, |
|
"grad_norm": 2.5713512897491455, |
|
"learning_rate": 9.964666666666667e-05, |
|
"loss": 4.0593, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.07201460867776034, |
|
"grad_norm": 2.840730905532837, |
|
"learning_rate": 9.959616161616162e-05, |
|
"loss": 3.9324, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.07458655898768035, |
|
"grad_norm": 2.8055996894836426, |
|
"learning_rate": 9.954565656565658e-05, |
|
"loss": 3.8384, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.07715850929760038, |
|
"grad_norm": 3.108902931213379, |
|
"learning_rate": 9.949525252525252e-05, |
|
"loss": 3.7442, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.07973045960752038, |
|
"grad_norm": 2.613213539123535, |
|
"learning_rate": 9.944474747474748e-05, |
|
"loss": 3.6902, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.0823024099174404, |
|
"grad_norm": 3.105239152908325, |
|
"learning_rate": 9.939424242424243e-05, |
|
"loss": 3.5897, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.0848743602273604, |
|
"grad_norm": 2.4152445793151855, |
|
"learning_rate": 9.934373737373737e-05, |
|
"loss": 3.5084, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.08744631053728041, |
|
"grad_norm": 2.6828176975250244, |
|
"learning_rate": 9.929333333333333e-05, |
|
"loss": 3.4708, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.09001826084720044, |
|
"grad_norm": 3.0051541328430176, |
|
"learning_rate": 9.92428282828283e-05, |
|
"loss": 3.3825, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.09259021115712045, |
|
"grad_norm": 2.8882691860198975, |
|
"learning_rate": 9.919232323232324e-05, |
|
"loss": 3.3456, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.09516216146704046, |
|
"grad_norm": 2.8528401851654053, |
|
"learning_rate": 9.914181818181819e-05, |
|
"loss": 3.3129, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.09773411177696047, |
|
"grad_norm": 2.7194721698760986, |
|
"learning_rate": 9.909141414141415e-05, |
|
"loss": 3.2803, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.10030606208688048, |
|
"grad_norm": 2.450242042541504, |
|
"learning_rate": 9.90410101010101e-05, |
|
"loss": 3.2348, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.1028780123968005, |
|
"grad_norm": 2.868133068084717, |
|
"learning_rate": 9.899050505050505e-05, |
|
"loss": 3.1772, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.10544996270672051, |
|
"grad_norm": 2.5029168128967285, |
|
"learning_rate": 9.894e-05, |
|
"loss": 3.1512, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.10802191301664052, |
|
"grad_norm": 2.388946771621704, |
|
"learning_rate": 9.888949494949496e-05, |
|
"loss": 3.1072, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.11059386332656053, |
|
"grad_norm": 2.435948371887207, |
|
"learning_rate": 9.88389898989899e-05, |
|
"loss": 3.1057, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.11316581363648054, |
|
"grad_norm": 2.745619058609009, |
|
"learning_rate": 9.878858585858586e-05, |
|
"loss": 3.0523, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.11573776394640055, |
|
"grad_norm": 2.6838150024414062, |
|
"learning_rate": 9.873808080808081e-05, |
|
"loss": 3.023, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.11830971425632057, |
|
"grad_norm": 2.8772313594818115, |
|
"learning_rate": 9.868757575757577e-05, |
|
"loss": 2.9742, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.12088166456624058, |
|
"grad_norm": 3.1740212440490723, |
|
"learning_rate": 9.863707070707072e-05, |
|
"loss": 2.9758, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.12345361487616059, |
|
"grad_norm": 3.2220029830932617, |
|
"learning_rate": 9.858656565656566e-05, |
|
"loss": 2.9479, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.1260255651860806, |
|
"grad_norm": 2.6701834201812744, |
|
"learning_rate": 9.853616161616162e-05, |
|
"loss": 2.9271, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.12859751549600063, |
|
"grad_norm": 2.7941513061523438, |
|
"learning_rate": 9.848565656565657e-05, |
|
"loss": 2.9261, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.13116946580592062, |
|
"grad_norm": 2.891564130783081, |
|
"learning_rate": 9.843515151515153e-05, |
|
"loss": 2.891, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.13374141611584064, |
|
"grad_norm": 2.8883216381073, |
|
"learning_rate": 9.838464646464647e-05, |
|
"loss": 2.8803, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.13631336642576067, |
|
"grad_norm": 2.4983842372894287, |
|
"learning_rate": 9.833424242424243e-05, |
|
"loss": 2.8642, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.13888531673568066, |
|
"grad_norm": 2.7084362506866455, |
|
"learning_rate": 9.828373737373738e-05, |
|
"loss": 2.8195, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.1414572670456007, |
|
"grad_norm": 2.29557204246521, |
|
"learning_rate": 9.823323232323233e-05, |
|
"loss": 2.814, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.14402921735552068, |
|
"grad_norm": 2.3679752349853516, |
|
"learning_rate": 9.818272727272729e-05, |
|
"loss": 2.7952, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.1466011676654407, |
|
"grad_norm": 2.6051392555236816, |
|
"learning_rate": 9.813232323232325e-05, |
|
"loss": 2.7855, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.1491731179753607, |
|
"grad_norm": 3.920278310775757, |
|
"learning_rate": 9.808181818181818e-05, |
|
"loss": 2.7721, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.15174506828528073, |
|
"grad_norm": 3.2232682704925537, |
|
"learning_rate": 9.803131313131314e-05, |
|
"loss": 2.7573, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.15431701859520075, |
|
"grad_norm": 2.551081418991089, |
|
"learning_rate": 9.798080808080809e-05, |
|
"loss": 2.7375, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.15688896890512075, |
|
"grad_norm": 2.425506114959717, |
|
"learning_rate": 9.793040404040405e-05, |
|
"loss": 2.7166, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.15946091921504077, |
|
"grad_norm": 2.9112095832824707, |
|
"learning_rate": 9.7879898989899e-05, |
|
"loss": 2.6943, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.16203286952496077, |
|
"grad_norm": 2.865812063217163, |
|
"learning_rate": 9.782939393939394e-05, |
|
"loss": 2.6932, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.1646048198348808, |
|
"grad_norm": 2.484619379043579, |
|
"learning_rate": 9.77788888888889e-05, |
|
"loss": 2.6781, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.1671767701448008, |
|
"grad_norm": 2.5136959552764893, |
|
"learning_rate": 9.772848484848486e-05, |
|
"loss": 2.6624, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.1697487204547208, |
|
"grad_norm": 3.022930860519409, |
|
"learning_rate": 9.76779797979798e-05, |
|
"loss": 2.6657, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.17232067076464083, |
|
"grad_norm": 2.9088263511657715, |
|
"learning_rate": 9.762747474747475e-05, |
|
"loss": 2.6475, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.17489262107456083, |
|
"grad_norm": 2.884895086288452, |
|
"learning_rate": 9.75769696969697e-05, |
|
"loss": 2.6192, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.17746457138448085, |
|
"grad_norm": 2.8403241634368896, |
|
"learning_rate": 9.752656565656566e-05, |
|
"loss": 2.6216, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.18003652169440088, |
|
"grad_norm": 2.7791824340820312, |
|
"learning_rate": 9.747606060606062e-05, |
|
"loss": 2.6003, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.18260847200432087, |
|
"grad_norm": 2.736762762069702, |
|
"learning_rate": 9.742555555555556e-05, |
|
"loss": 2.5981, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.1851804223142409, |
|
"grad_norm": 2.719017744064331, |
|
"learning_rate": 9.737505050505051e-05, |
|
"loss": 2.5924, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.1877523726241609, |
|
"grad_norm": 3.110269784927368, |
|
"learning_rate": 9.732464646464647e-05, |
|
"loss": 2.5829, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.19032432293408091, |
|
"grad_norm": 2.6917083263397217, |
|
"learning_rate": 9.727414141414141e-05, |
|
"loss": 2.569, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.19289627324400094, |
|
"grad_norm": 2.3601632118225098, |
|
"learning_rate": 9.722363636363637e-05, |
|
"loss": 2.5608, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.19546822355392093, |
|
"grad_norm": 2.266639232635498, |
|
"learning_rate": 9.717313131313132e-05, |
|
"loss": 2.5411, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.19804017386384096, |
|
"grad_norm": 3.149444818496704, |
|
"learning_rate": 9.712262626262627e-05, |
|
"loss": 2.5342, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.20061212417376095, |
|
"grad_norm": 2.4720096588134766, |
|
"learning_rate": 9.707222222222223e-05, |
|
"loss": 2.5222, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.20318407448368098, |
|
"grad_norm": 3.0014114379882812, |
|
"learning_rate": 9.702171717171717e-05, |
|
"loss": 2.5228, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.205756024793601, |
|
"grad_norm": 3.3219223022460938, |
|
"learning_rate": 9.697121212121213e-05, |
|
"loss": 2.5232, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.208327975103521, |
|
"grad_norm": 2.2936556339263916, |
|
"learning_rate": 9.692070707070708e-05, |
|
"loss": 2.5031, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.21089992541344102, |
|
"grad_norm": 2.9339241981506348, |
|
"learning_rate": 9.687030303030304e-05, |
|
"loss": 2.4811, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.21347187572336102, |
|
"grad_norm": 3.1717493534088135, |
|
"learning_rate": 9.681979797979799e-05, |
|
"loss": 2.4881, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.21604382603328104, |
|
"grad_norm": 3.3218414783477783, |
|
"learning_rate": 9.676929292929293e-05, |
|
"loss": 2.4928, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.21861577634320106, |
|
"grad_norm": 2.5804648399353027, |
|
"learning_rate": 9.671878787878789e-05, |
|
"loss": 2.49, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.22118772665312106, |
|
"grad_norm": 2.6406478881835938, |
|
"learning_rate": 9.666838383838385e-05, |
|
"loss": 2.4826, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.22375967696304108, |
|
"grad_norm": 2.9224679470062256, |
|
"learning_rate": 9.66178787878788e-05, |
|
"loss": 2.4601, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.22633162727296108, |
|
"grad_norm": 2.5592384338378906, |
|
"learning_rate": 9.656737373737374e-05, |
|
"loss": 2.4472, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.2289035775828811, |
|
"grad_norm": 2.8015081882476807, |
|
"learning_rate": 9.651686868686869e-05, |
|
"loss": 2.4617, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.2314755278928011, |
|
"grad_norm": 3.1833553314208984, |
|
"learning_rate": 9.646646464646465e-05, |
|
"loss": 2.4373, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.23404747820272112, |
|
"grad_norm": 2.631361961364746, |
|
"learning_rate": 9.641595959595961e-05, |
|
"loss": 2.4167, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.23661942851264114, |
|
"grad_norm": 2.7147443294525146, |
|
"learning_rate": 9.636545454545454e-05, |
|
"loss": 2.4266, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.23919137882256114, |
|
"grad_norm": 2.604551315307617, |
|
"learning_rate": 9.63149494949495e-05, |
|
"loss": 2.4089, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.24176332913248116, |
|
"grad_norm": 2.733030319213867, |
|
"learning_rate": 9.626454545454546e-05, |
|
"loss": 2.4003, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.24433527944240116, |
|
"grad_norm": 2.895327568054199, |
|
"learning_rate": 9.621404040404041e-05, |
|
"loss": 2.3862, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.24690722975232118, |
|
"grad_norm": 2.6326534748077393, |
|
"learning_rate": 9.616353535353535e-05, |
|
"loss": 2.4035, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.2494791800622412, |
|
"grad_norm": 2.8169047832489014, |
|
"learning_rate": 9.61130303030303e-05, |
|
"loss": 2.3894, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.2520511303721612, |
|
"grad_norm": 2.4093706607818604, |
|
"learning_rate": 9.606262626262626e-05, |
|
"loss": 2.3925, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.2546230806820812, |
|
"grad_norm": 2.373400926589966, |
|
"learning_rate": 9.601212121212122e-05, |
|
"loss": 2.385, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.25719503099200125, |
|
"grad_norm": 2.445448160171509, |
|
"learning_rate": 9.596161616161617e-05, |
|
"loss": 2.3762, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.2597669813019213, |
|
"grad_norm": 2.641312599182129, |
|
"learning_rate": 9.591111111111111e-05, |
|
"loss": 2.3798, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.26233893161184124, |
|
"grad_norm": 2.40631103515625, |
|
"learning_rate": 9.586060606060606e-05, |
|
"loss": 2.3656, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.26491088192176127, |
|
"grad_norm": 2.609057664871216, |
|
"learning_rate": 9.581020202020202e-05, |
|
"loss": 2.365, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.2674828322316813, |
|
"grad_norm": 2.9380626678466797, |
|
"learning_rate": 9.575969696969698e-05, |
|
"loss": 2.3512, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.2700547825416013, |
|
"grad_norm": 2.5909035205841064, |
|
"learning_rate": 9.570919191919193e-05, |
|
"loss": 2.3351, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.27262673285152134, |
|
"grad_norm": 2.4578676223754883, |
|
"learning_rate": 9.565868686868687e-05, |
|
"loss": 2.3516, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.2751986831614413, |
|
"grad_norm": 2.208662748336792, |
|
"learning_rate": 9.560828282828283e-05, |
|
"loss": 2.3376, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.27777063347136133, |
|
"grad_norm": 2.3018739223480225, |
|
"learning_rate": 9.555777777777778e-05, |
|
"loss": 2.3327, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.28034258378128135, |
|
"grad_norm": 3.107210159301758, |
|
"learning_rate": 9.550727272727274e-05, |
|
"loss": 2.3187, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.2829145340912014, |
|
"grad_norm": 2.857588052749634, |
|
"learning_rate": 9.545676767676768e-05, |
|
"loss": 2.3302, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.28548648440112134, |
|
"grad_norm": 2.459374189376831, |
|
"learning_rate": 9.540636363636364e-05, |
|
"loss": 2.3125, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.28805843471104137, |
|
"grad_norm": 2.3911349773406982, |
|
"learning_rate": 9.535585858585859e-05, |
|
"loss": 2.3184, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.2906303850209614, |
|
"grad_norm": 2.443305492401123, |
|
"learning_rate": 9.530535353535354e-05, |
|
"loss": 2.3133, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.2932023353308814, |
|
"grad_norm": 2.788959503173828, |
|
"learning_rate": 9.52548484848485e-05, |
|
"loss": 2.3109, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.29577428564080144, |
|
"grad_norm": 2.704943895339966, |
|
"learning_rate": 9.520444444444446e-05, |
|
"loss": 2.3016, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.2983462359507214, |
|
"grad_norm": 2.6149935722351074, |
|
"learning_rate": 9.51539393939394e-05, |
|
"loss": 2.2847, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.30091818626064143, |
|
"grad_norm": 2.797826051712036, |
|
"learning_rate": 9.510343434343435e-05, |
|
"loss": 2.294, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.30349013657056145, |
|
"grad_norm": 2.6312453746795654, |
|
"learning_rate": 9.50529292929293e-05, |
|
"loss": 2.2853, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.3060620868804815, |
|
"grad_norm": 2.364706039428711, |
|
"learning_rate": 9.500242424242425e-05, |
|
"loss": 2.274, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.3086340371904015, |
|
"grad_norm": 2.1500983238220215, |
|
"learning_rate": 9.495202020202021e-05, |
|
"loss": 2.2691, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.31120598750032147, |
|
"grad_norm": 2.4474480152130127, |
|
"learning_rate": 9.490151515151515e-05, |
|
"loss": 2.2765, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.3137779378102415, |
|
"grad_norm": 2.65130352973938, |
|
"learning_rate": 9.48510101010101e-05, |
|
"loss": 2.2525, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.3163498881201615, |
|
"grad_norm": 2.6861233711242676, |
|
"learning_rate": 9.480050505050505e-05, |
|
"loss": 2.2492, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.31892183843008154, |
|
"grad_norm": 3.0400047302246094, |
|
"learning_rate": 9.475010101010101e-05, |
|
"loss": 2.2565, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.32149378874000156, |
|
"grad_norm": 2.5578489303588867, |
|
"learning_rate": 9.469959595959597e-05, |
|
"loss": 2.2619, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.32406573904992153, |
|
"grad_norm": 2.904978036880493, |
|
"learning_rate": 9.46490909090909e-05, |
|
"loss": 2.2284, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.32663768935984155, |
|
"grad_norm": 2.79347562789917, |
|
"learning_rate": 9.459858585858586e-05, |
|
"loss": 2.2413, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.3292096396697616, |
|
"grad_norm": 2.5674405097961426, |
|
"learning_rate": 9.454808080808081e-05, |
|
"loss": 2.2501, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.3317815899796816, |
|
"grad_norm": 3.054811716079712, |
|
"learning_rate": 9.449767676767677e-05, |
|
"loss": 2.2433, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.3343535402896016, |
|
"grad_norm": 2.797732353210449, |
|
"learning_rate": 9.444717171717172e-05, |
|
"loss": 2.2285, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.3369254905995216, |
|
"grad_norm": 2.077179193496704, |
|
"learning_rate": 9.439666666666666e-05, |
|
"loss": 2.233, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.3394974409094416, |
|
"grad_norm": 2.7943077087402344, |
|
"learning_rate": 9.434616161616162e-05, |
|
"loss": 2.2336, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.34206939121936164, |
|
"grad_norm": 2.4715709686279297, |
|
"learning_rate": 9.429575757575758e-05, |
|
"loss": 2.2161, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.34464134152928166, |
|
"grad_norm": 2.578552484512329, |
|
"learning_rate": 9.424525252525253e-05, |
|
"loss": 2.2184, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.3472132918392017, |
|
"grad_norm": 2.8192737102508545, |
|
"learning_rate": 9.419474747474748e-05, |
|
"loss": 2.213, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.34978524214912166, |
|
"grad_norm": 2.719334125518799, |
|
"learning_rate": 9.414424242424242e-05, |
|
"loss": 2.2029, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.3523571924590417, |
|
"grad_norm": 2.509049892425537, |
|
"learning_rate": 9.409373737373738e-05, |
|
"loss": 2.2093, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.3549291427689617, |
|
"grad_norm": 2.238666296005249, |
|
"learning_rate": 9.404323232323233e-05, |
|
"loss": 2.2092, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.3575010930788817, |
|
"grad_norm": 2.2683796882629395, |
|
"learning_rate": 9.399272727272727e-05, |
|
"loss": 2.2011, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.36007304338880175, |
|
"grad_norm": 2.6098029613494873, |
|
"learning_rate": 9.394232323232323e-05, |
|
"loss": 2.1919, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.3626449936987217, |
|
"grad_norm": 2.656914234161377, |
|
"learning_rate": 9.389181818181818e-05, |
|
"loss": 2.2042, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.36521694400864174, |
|
"grad_norm": 2.753380298614502, |
|
"learning_rate": 9.384131313131314e-05, |
|
"loss": 2.1879, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.36778889431856177, |
|
"grad_norm": 2.4511659145355225, |
|
"learning_rate": 9.37909090909091e-05, |
|
"loss": 2.1984, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.3703608446284818, |
|
"grad_norm": 2.4932587146759033, |
|
"learning_rate": 9.374040404040403e-05, |
|
"loss": 2.1822, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.3729327949384018, |
|
"grad_norm": 2.8497045040130615, |
|
"learning_rate": 9.368989898989899e-05, |
|
"loss": 2.184, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.3755047452483218, |
|
"grad_norm": 2.5217344760894775, |
|
"learning_rate": 9.363939393939395e-05, |
|
"loss": 2.1765, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.3780766955582418, |
|
"grad_norm": 2.4461801052093506, |
|
"learning_rate": 9.35888888888889e-05, |
|
"loss": 2.174, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.38064864586816183, |
|
"grad_norm": 2.3911330699920654, |
|
"learning_rate": 9.353838383838385e-05, |
|
"loss": 2.1655, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.38322059617808185, |
|
"grad_norm": 2.4616994857788086, |
|
"learning_rate": 9.348787878787879e-05, |
|
"loss": 2.1657, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.3857925464880019, |
|
"grad_norm": 2.8872811794281006, |
|
"learning_rate": 9.343737373737375e-05, |
|
"loss": 2.1677, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.38836449679792184, |
|
"grad_norm": 2.5439906120300293, |
|
"learning_rate": 9.338686868686868e-05, |
|
"loss": 2.1727, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.39093644710784187, |
|
"grad_norm": 2.687584638595581, |
|
"learning_rate": 9.333636363636364e-05, |
|
"loss": 2.1606, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.3935083974177619, |
|
"grad_norm": 2.353545904159546, |
|
"learning_rate": 9.328585858585859e-05, |
|
"loss": 2.1468, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.3960803477276819, |
|
"grad_norm": 2.3765275478363037, |
|
"learning_rate": 9.323545454545455e-05, |
|
"loss": 2.1593, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.39865229803760194, |
|
"grad_norm": 2.507904052734375, |
|
"learning_rate": 9.31849494949495e-05, |
|
"loss": 2.1571, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.4012242483475219, |
|
"grad_norm": 2.3261361122131348, |
|
"learning_rate": 9.313444444444444e-05, |
|
"loss": 2.1511, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.40379619865744193, |
|
"grad_norm": 2.7640092372894287, |
|
"learning_rate": 9.30839393939394e-05, |
|
"loss": 2.1538, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.40636814896736195, |
|
"grad_norm": 2.9779064655303955, |
|
"learning_rate": 9.303343434343435e-05, |
|
"loss": 2.1464, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.408940099277282, |
|
"grad_norm": 2.406595468521118, |
|
"learning_rate": 9.298303030303031e-05, |
|
"loss": 2.141, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.411512049587202, |
|
"grad_norm": 2.4242331981658936, |
|
"learning_rate": 9.293252525252526e-05, |
|
"loss": 2.1457, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.41408399989712197, |
|
"grad_norm": 3.289874315261841, |
|
"learning_rate": 9.28820202020202e-05, |
|
"loss": 2.1143, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.416655950207042, |
|
"grad_norm": 2.4861912727355957, |
|
"learning_rate": 9.283151515151516e-05, |
|
"loss": 2.1366, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.419227900516962, |
|
"grad_norm": 2.6344797611236572, |
|
"learning_rate": 9.278101010101011e-05, |
|
"loss": 2.1232, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.42179985082688204, |
|
"grad_norm": 2.6407787799835205, |
|
"learning_rate": 9.273060606060607e-05, |
|
"loss": 2.1326, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.42437180113680206, |
|
"grad_norm": 2.258902072906494, |
|
"learning_rate": 9.268010101010101e-05, |
|
"loss": 2.1315, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.42694375144672203, |
|
"grad_norm": 2.644082546234131, |
|
"learning_rate": 9.262959595959596e-05, |
|
"loss": 2.1234, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.42951570175664205, |
|
"grad_norm": 2.3079209327697754, |
|
"learning_rate": 9.257909090909092e-05, |
|
"loss": 2.1095, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.4320876520665621, |
|
"grad_norm": 2.5316202640533447, |
|
"learning_rate": 9.252858585858585e-05, |
|
"loss": 2.1202, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.4346596023764821, |
|
"grad_norm": 2.330894708633423, |
|
"learning_rate": 9.247808080808081e-05, |
|
"loss": 2.0964, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.4372315526864021, |
|
"grad_norm": 2.368502140045166, |
|
"learning_rate": 9.242757575757576e-05, |
|
"loss": 2.1111, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.4398035029963221, |
|
"grad_norm": 2.195286512374878, |
|
"learning_rate": 9.237707070707072e-05, |
|
"loss": 2.1045, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.4423754533062421, |
|
"grad_norm": 2.3127996921539307, |
|
"learning_rate": 9.232676767676768e-05, |
|
"loss": 2.1115, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.44494740361616214, |
|
"grad_norm": 2.3976833820343018, |
|
"learning_rate": 9.227626262626264e-05, |
|
"loss": 2.1074, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.44751935392608216, |
|
"grad_norm": 2.5539162158966064, |
|
"learning_rate": 9.222575757575757e-05, |
|
"loss": 2.0948, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.4500913042360022, |
|
"grad_norm": 2.6165366172790527, |
|
"learning_rate": 9.217525252525253e-05, |
|
"loss": 2.1028, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.45266325454592216, |
|
"grad_norm": 2.407905101776123, |
|
"learning_rate": 9.212484848484849e-05, |
|
"loss": 2.0883, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.4552352048558422, |
|
"grad_norm": 2.6928274631500244, |
|
"learning_rate": 9.207434343434344e-05, |
|
"loss": 2.0961, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.4578071551657622, |
|
"grad_norm": 2.3398540019989014, |
|
"learning_rate": 9.20238383838384e-05, |
|
"loss": 2.0902, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.4603791054756822, |
|
"grad_norm": 2.5254998207092285, |
|
"learning_rate": 9.197333333333333e-05, |
|
"loss": 2.0807, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.4629510557856022, |
|
"grad_norm": 2.4837093353271484, |
|
"learning_rate": 9.192292929292929e-05, |
|
"loss": 2.0843, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.4655230060955222, |
|
"grad_norm": 2.218877077102661, |
|
"learning_rate": 9.187242424242425e-05, |
|
"loss": 2.0727, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.46809495640544224, |
|
"grad_norm": 2.3560657501220703, |
|
"learning_rate": 9.18219191919192e-05, |
|
"loss": 2.0905, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.47066690671536227, |
|
"grad_norm": 2.2257909774780273, |
|
"learning_rate": 9.177141414141414e-05, |
|
"loss": 2.0674, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.4732388570252823, |
|
"grad_norm": 2.5964746475219727, |
|
"learning_rate": 9.172090909090909e-05, |
|
"loss": 2.0798, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.47581080733520226, |
|
"grad_norm": 3.076167345046997, |
|
"learning_rate": 9.167050505050505e-05, |
|
"loss": 2.0814, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.4783827576451223, |
|
"grad_norm": 2.514014482498169, |
|
"learning_rate": 9.162000000000001e-05, |
|
"loss": 2.0795, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.4809547079550423, |
|
"grad_norm": 2.6532745361328125, |
|
"learning_rate": 9.156949494949495e-05, |
|
"loss": 2.0626, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.48352665826496233, |
|
"grad_norm": 2.583951950073242, |
|
"learning_rate": 9.15189898989899e-05, |
|
"loss": 2.0759, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.48609860857488235, |
|
"grad_norm": 2.2346367835998535, |
|
"learning_rate": 9.146858585858586e-05, |
|
"loss": 2.0581, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.4886705588848023, |
|
"grad_norm": 2.8640918731689453, |
|
"learning_rate": 9.141818181818182e-05, |
|
"loss": 2.0758, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.49124250919472234, |
|
"grad_norm": 2.441415548324585, |
|
"learning_rate": 9.136767676767677e-05, |
|
"loss": 2.0634, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.49381445950464237, |
|
"grad_norm": 2.3812661170959473, |
|
"learning_rate": 9.131717171717173e-05, |
|
"loss": 2.0711, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.4963864098145624, |
|
"grad_norm": 2.4360954761505127, |
|
"learning_rate": 9.126666666666667e-05, |
|
"loss": 2.0472, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.4989583601244824, |
|
"grad_norm": 2.4400837421417236, |
|
"learning_rate": 9.121616161616162e-05, |
|
"loss": 2.0556, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.5015303104344024, |
|
"grad_norm": 2.2711386680603027, |
|
"learning_rate": 9.116565656565656e-05, |
|
"loss": 2.0581, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.5041022607443224, |
|
"grad_norm": 2.3467113971710205, |
|
"learning_rate": 9.111515151515152e-05, |
|
"loss": 2.0642, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.5066742110542425, |
|
"grad_norm": 2.3878612518310547, |
|
"learning_rate": 9.106464646464646e-05, |
|
"loss": 2.0561, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.5092461613641625, |
|
"grad_norm": 2.205702066421509, |
|
"learning_rate": 9.101424242424243e-05, |
|
"loss": 2.0618, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.5118181116740824, |
|
"grad_norm": 2.889329195022583, |
|
"learning_rate": 9.096373737373738e-05, |
|
"loss": 2.0457, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.5143900619840025, |
|
"grad_norm": 2.655266761779785, |
|
"learning_rate": 9.091323232323232e-05, |
|
"loss": 2.036, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.5169620122939225, |
|
"grad_norm": 2.392587900161743, |
|
"learning_rate": 9.086272727272728e-05, |
|
"loss": 2.0485, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.5195339626038425, |
|
"grad_norm": 2.4802868366241455, |
|
"learning_rate": 9.081242424242424e-05, |
|
"loss": 2.0478, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.5221059129137625, |
|
"grad_norm": 2.0449373722076416, |
|
"learning_rate": 9.07619191919192e-05, |
|
"loss": 2.0337, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.5246778632236825, |
|
"grad_norm": 2.384089946746826, |
|
"learning_rate": 9.071141414141415e-05, |
|
"loss": 2.0171, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.5272498135336026, |
|
"grad_norm": 2.4087131023406982, |
|
"learning_rate": 9.06609090909091e-05, |
|
"loss": 2.0412, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.5298217638435225, |
|
"grad_norm": 2.774549961090088, |
|
"learning_rate": 9.061040404040404e-05, |
|
"loss": 2.0364, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.5323937141534426, |
|
"grad_norm": 2.3224120140075684, |
|
"learning_rate": 9.0559898989899e-05, |
|
"loss": 2.0317, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.5349656644633626, |
|
"grad_norm": 2.3578784465789795, |
|
"learning_rate": 9.050939393939393e-05, |
|
"loss": 2.0221, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.5375376147732825, |
|
"grad_norm": 2.2884316444396973, |
|
"learning_rate": 9.04588888888889e-05, |
|
"loss": 2.0284, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.5401095650832026, |
|
"grad_norm": 2.560002326965332, |
|
"learning_rate": 9.040838383838385e-05, |
|
"loss": 2.0176, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.5426815153931226, |
|
"grad_norm": 2.5888469219207764, |
|
"learning_rate": 9.035787878787879e-05, |
|
"loss": 2.0265, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.5452534657030427, |
|
"grad_norm": 2.30547833442688, |
|
"learning_rate": 9.030737373737375e-05, |
|
"loss": 2.0285, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.5478254160129626, |
|
"grad_norm": 2.7285144329071045, |
|
"learning_rate": 9.02569696969697e-05, |
|
"loss": 2.0138, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.5503973663228826, |
|
"grad_norm": 2.7011213302612305, |
|
"learning_rate": 9.020646464646465e-05, |
|
"loss": 2.0173, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.5529693166328027, |
|
"grad_norm": 2.5760533809661865, |
|
"learning_rate": 9.01559595959596e-05, |
|
"loss": 2.0091, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.5555412669427227, |
|
"grad_norm": 2.6803488731384277, |
|
"learning_rate": 9.010545454545454e-05, |
|
"loss": 2.0199, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.5581132172526426, |
|
"grad_norm": 2.3978312015533447, |
|
"learning_rate": 9.00549494949495e-05, |
|
"loss": 2.0109, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.5606851675625627, |
|
"grad_norm": 2.530170202255249, |
|
"learning_rate": 9.000454545454546e-05, |
|
"loss": 2.0085, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.5632571178724827, |
|
"grad_norm": 2.43276309967041, |
|
"learning_rate": 8.995404040404041e-05, |
|
"loss": 1.9883, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.5658290681824028, |
|
"grad_norm": 2.404324531555176, |
|
"learning_rate": 8.990353535353536e-05, |
|
"loss": 1.9948, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.5684010184923227, |
|
"grad_norm": 2.732954740524292, |
|
"learning_rate": 8.98530303030303e-05, |
|
"loss": 1.993, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.5709729688022427, |
|
"grad_norm": 2.43375563621521, |
|
"learning_rate": 8.980262626262626e-05, |
|
"loss": 2.0089, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.5735449191121628, |
|
"grad_norm": 2.824575662612915, |
|
"learning_rate": 8.975212121212122e-05, |
|
"loss": 2.0035, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.5761168694220827, |
|
"grad_norm": 2.329143524169922, |
|
"learning_rate": 8.970161616161617e-05, |
|
"loss": 1.9987, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.5786888197320028, |
|
"grad_norm": 2.400956392288208, |
|
"learning_rate": 8.965111111111112e-05, |
|
"loss": 1.9997, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.5812607700419228, |
|
"grad_norm": 2.737149953842163, |
|
"learning_rate": 8.960070707070707e-05, |
|
"loss": 1.9925, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.5838327203518427, |
|
"grad_norm": 2.3448445796966553, |
|
"learning_rate": 8.955020202020202e-05, |
|
"loss": 2.0024, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.5864046706617628, |
|
"grad_norm": 2.8817691802978516, |
|
"learning_rate": 8.949969696969698e-05, |
|
"loss": 1.9896, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.5889766209716828, |
|
"grad_norm": 2.6253979206085205, |
|
"learning_rate": 8.944929292929294e-05, |
|
"loss": 1.985, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.5915485712816029, |
|
"grad_norm": 2.2400150299072266, |
|
"learning_rate": 8.939878787878789e-05, |
|
"loss": 1.9898, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.5941205215915228, |
|
"grad_norm": 2.847470760345459, |
|
"learning_rate": 8.934828282828283e-05, |
|
"loss": 1.9939, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.5966924719014428, |
|
"grad_norm": 3.0489280223846436, |
|
"learning_rate": 8.929777777777778e-05, |
|
"loss": 1.9968, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.5992644222113629, |
|
"grad_norm": 2.2937958240509033, |
|
"learning_rate": 8.924727272727274e-05, |
|
"loss": 1.9976, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.6018363725212829, |
|
"grad_norm": 2.5017504692077637, |
|
"learning_rate": 8.919676767676767e-05, |
|
"loss": 1.9912, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.6044083228312029, |
|
"grad_norm": 2.5178418159484863, |
|
"learning_rate": 8.914626262626263e-05, |
|
"loss": 1.9854, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.6069802731411229, |
|
"grad_norm": 2.754523515701294, |
|
"learning_rate": 8.909575757575758e-05, |
|
"loss": 1.9814, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.6095522234510429, |
|
"grad_norm": 2.4813973903656006, |
|
"learning_rate": 8.904525252525252e-05, |
|
"loss": 1.994, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.612124173760963, |
|
"grad_norm": 2.0074260234832764, |
|
"learning_rate": 8.89948484848485e-05, |
|
"loss": 1.9778, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.6146961240708829, |
|
"grad_norm": 2.4869885444641113, |
|
"learning_rate": 8.894434343434343e-05, |
|
"loss": 1.9809, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.617268074380803, |
|
"grad_norm": 2.464909315109253, |
|
"learning_rate": 8.889383838383839e-05, |
|
"loss": 1.9671, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.619840024690723, |
|
"grad_norm": 2.330047130584717, |
|
"learning_rate": 8.884333333333334e-05, |
|
"loss": 1.9712, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.6224119750006429, |
|
"grad_norm": 2.894199848175049, |
|
"learning_rate": 8.87929292929293e-05, |
|
"loss": 1.9747, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.624983925310563, |
|
"grad_norm": 2.962379217147827, |
|
"learning_rate": 8.874242424242424e-05, |
|
"loss": 1.9689, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.627555875620483, |
|
"grad_norm": 3.0637989044189453, |
|
"learning_rate": 8.869191919191919e-05, |
|
"loss": 1.967, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.6301278259304031, |
|
"grad_norm": 2.25830078125, |
|
"learning_rate": 8.864141414141415e-05, |
|
"loss": 1.9635, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.632699776240323, |
|
"grad_norm": 2.3451120853424072, |
|
"learning_rate": 8.85909090909091e-05, |
|
"loss": 1.9664, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.635271726550243, |
|
"grad_norm": 2.26731538772583, |
|
"learning_rate": 8.854050505050506e-05, |
|
"loss": 1.9561, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.6378436768601631, |
|
"grad_norm": 2.3904566764831543, |
|
"learning_rate": 8.849e-05, |
|
"loss": 1.9619, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.640415627170083, |
|
"grad_norm": 2.415607213973999, |
|
"learning_rate": 8.843949494949495e-05, |
|
"loss": 1.9748, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.6429875774800031, |
|
"grad_norm": 2.9378740787506104, |
|
"learning_rate": 8.838898989898991e-05, |
|
"loss": 1.9602, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.6455595277899231, |
|
"grad_norm": 2.1163997650146484, |
|
"learning_rate": 8.833858585858587e-05, |
|
"loss": 1.9498, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.6481314780998431, |
|
"grad_norm": 2.2119147777557373, |
|
"learning_rate": 8.828818181818183e-05, |
|
"loss": 1.9623, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.6507034284097631, |
|
"grad_norm": 3.078888416290283, |
|
"learning_rate": 8.823767676767677e-05, |
|
"loss": 1.9501, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.6532753787196831, |
|
"grad_norm": 3.1210856437683105, |
|
"learning_rate": 8.818717171717172e-05, |
|
"loss": 1.963, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.6558473290296032, |
|
"grad_norm": 2.1710915565490723, |
|
"learning_rate": 8.813666666666667e-05, |
|
"loss": 1.9418, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.6584192793395232, |
|
"grad_norm": 2.1447181701660156, |
|
"learning_rate": 8.808616161616163e-05, |
|
"loss": 1.9669, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.6609912296494431, |
|
"grad_norm": 3.214812994003296, |
|
"learning_rate": 8.803565656565657e-05, |
|
"loss": 1.9356, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.6635631799593632, |
|
"grad_norm": 2.4240269660949707, |
|
"learning_rate": 8.798515151515152e-05, |
|
"loss": 1.9637, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.6661351302692832, |
|
"grad_norm": 2.5283048152923584, |
|
"learning_rate": 8.793474747474748e-05, |
|
"loss": 1.948, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.6687070805792033, |
|
"grad_norm": 2.215092182159424, |
|
"learning_rate": 8.788424242424242e-05, |
|
"loss": 1.9401, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.6712790308891232, |
|
"grad_norm": 2.387033462524414, |
|
"learning_rate": 8.783373737373738e-05, |
|
"loss": 1.9403, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.6738509811990432, |
|
"grad_norm": 2.3272926807403564, |
|
"learning_rate": 8.778323232323232e-05, |
|
"loss": 1.9288, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.6764229315089633, |
|
"grad_norm": 2.4151089191436768, |
|
"learning_rate": 8.773272727272728e-05, |
|
"loss": 1.9528, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.6789948818188832, |
|
"grad_norm": 2.122108221054077, |
|
"learning_rate": 8.768222222222222e-05, |
|
"loss": 1.9403, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.6815668321288033, |
|
"grad_norm": 2.8606338500976562, |
|
"learning_rate": 8.763171717171717e-05, |
|
"loss": 1.938, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.6841387824387233, |
|
"grad_norm": 2.832679510116577, |
|
"learning_rate": 8.758121212121213e-05, |
|
"loss": 1.9498, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.6867107327486432, |
|
"grad_norm": 2.884164571762085, |
|
"learning_rate": 8.753080808080808e-05, |
|
"loss": 1.9339, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.6892826830585633, |
|
"grad_norm": 2.577549457550049, |
|
"learning_rate": 8.748030303030304e-05, |
|
"loss": 1.9456, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.6918546333684833, |
|
"grad_norm": 2.42988920211792, |
|
"learning_rate": 8.7429797979798e-05, |
|
"loss": 1.9448, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.6944265836784034, |
|
"grad_norm": 2.5420963764190674, |
|
"learning_rate": 8.737929292929293e-05, |
|
"loss": 1.9261, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.6969985339883233, |
|
"grad_norm": 2.6064467430114746, |
|
"learning_rate": 8.732888888888889e-05, |
|
"loss": 1.9288, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.6995704842982433, |
|
"grad_norm": 2.149203062057495, |
|
"learning_rate": 8.727838383838383e-05, |
|
"loss": 1.9246, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.7021424346081634, |
|
"grad_norm": 2.064519166946411, |
|
"learning_rate": 8.72278787878788e-05, |
|
"loss": 1.9379, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.7047143849180834, |
|
"grad_norm": 2.159180164337158, |
|
"learning_rate": 8.717737373737374e-05, |
|
"loss": 1.9389, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.7072863352280034, |
|
"grad_norm": 2.478998899459839, |
|
"learning_rate": 8.71269696969697e-05, |
|
"loss": 1.9331, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.7098582855379234, |
|
"grad_norm": 2.2875208854675293, |
|
"learning_rate": 8.707646464646465e-05, |
|
"loss": 1.9263, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.7124302358478434, |
|
"grad_norm": 2.595557928085327, |
|
"learning_rate": 8.70259595959596e-05, |
|
"loss": 1.9253, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.7150021861577635, |
|
"grad_norm": 2.872157573699951, |
|
"learning_rate": 8.697545454545455e-05, |
|
"loss": 1.9149, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.7175741364676834, |
|
"grad_norm": 2.4363973140716553, |
|
"learning_rate": 8.69249494949495e-05, |
|
"loss": 1.9164, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.7201460867776035, |
|
"grad_norm": 2.8040812015533447, |
|
"learning_rate": 8.687444444444445e-05, |
|
"loss": 1.9134, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.7227180370875235, |
|
"grad_norm": 2.6890177726745605, |
|
"learning_rate": 8.68239393939394e-05, |
|
"loss": 1.9223, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.7252899873974434, |
|
"grad_norm": 2.4290647506713867, |
|
"learning_rate": 8.677343434343435e-05, |
|
"loss": 1.9172, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.7278619377073635, |
|
"grad_norm": 2.398864984512329, |
|
"learning_rate": 8.672303030303031e-05, |
|
"loss": 1.907, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.7304338880172835, |
|
"grad_norm": 2.4179599285125732, |
|
"learning_rate": 8.667252525252526e-05, |
|
"loss": 1.9108, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.7330058383272036, |
|
"grad_norm": 2.6131629943847656, |
|
"learning_rate": 8.66220202020202e-05, |
|
"loss": 1.9067, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.7355777886371235, |
|
"grad_norm": 2.302748203277588, |
|
"learning_rate": 8.657161616161616e-05, |
|
"loss": 1.9104, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.7381497389470435, |
|
"grad_norm": 2.1994614601135254, |
|
"learning_rate": 8.652111111111112e-05, |
|
"loss": 1.9173, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.7407216892569636, |
|
"grad_norm": 2.1997227668762207, |
|
"learning_rate": 8.647060606060607e-05, |
|
"loss": 1.9001, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.7432936395668835, |
|
"grad_norm": 2.480407953262329, |
|
"learning_rate": 8.642010101010102e-05, |
|
"loss": 1.9153, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.7458655898768036, |
|
"grad_norm": 2.447983503341675, |
|
"learning_rate": 8.636959595959596e-05, |
|
"loss": 1.9147, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.7484375401867236, |
|
"grad_norm": 2.3080880641937256, |
|
"learning_rate": 8.631919191919192e-05, |
|
"loss": 1.919, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.7510094904966436, |
|
"grad_norm": 2.5869462490081787, |
|
"learning_rate": 8.626868686868688e-05, |
|
"loss": 1.9078, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.7535814408065636, |
|
"grad_norm": 2.248598098754883, |
|
"learning_rate": 8.621818181818181e-05, |
|
"loss": 1.9036, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.7561533911164836, |
|
"grad_norm": 2.336503267288208, |
|
"learning_rate": 8.616767676767677e-05, |
|
"loss": 1.9049, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.7587253414264037, |
|
"grad_norm": 2.6740052700042725, |
|
"learning_rate": 8.611717171717172e-05, |
|
"loss": 1.8945, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.7612972917363237, |
|
"grad_norm": 2.3795812129974365, |
|
"learning_rate": 8.606676767676768e-05, |
|
"loss": 1.8985, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.7638692420462436, |
|
"grad_norm": 2.3991169929504395, |
|
"learning_rate": 8.601626262626264e-05, |
|
"loss": 1.8997, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.7664411923561637, |
|
"grad_norm": 2.6228420734405518, |
|
"learning_rate": 8.596575757575757e-05, |
|
"loss": 1.8892, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.7690131426660837, |
|
"grad_norm": 2.6543805599212646, |
|
"learning_rate": 8.591525252525253e-05, |
|
"loss": 1.9133, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.7715850929760038, |
|
"grad_norm": 2.5980093479156494, |
|
"learning_rate": 8.586474747474748e-05, |
|
"loss": 1.8999, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.7741570432859237, |
|
"grad_norm": 2.239975690841675, |
|
"learning_rate": 8.581434343434344e-05, |
|
"loss": 1.9011, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.7767289935958437, |
|
"grad_norm": 2.4112389087677, |
|
"learning_rate": 8.576383838383839e-05, |
|
"loss": 1.8845, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.7793009439057638, |
|
"grad_norm": 2.379509210586548, |
|
"learning_rate": 8.571333333333333e-05, |
|
"loss": 1.896, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.7818728942156837, |
|
"grad_norm": 2.4327831268310547, |
|
"learning_rate": 8.566282828282829e-05, |
|
"loss": 1.8935, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.7844448445256038, |
|
"grad_norm": 2.5598642826080322, |
|
"learning_rate": 8.561232323232324e-05, |
|
"loss": 1.8996, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.7870167948355238, |
|
"grad_norm": 2.7298407554626465, |
|
"learning_rate": 8.556181818181818e-05, |
|
"loss": 1.8954, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.7895887451454437, |
|
"grad_norm": 2.6706230640411377, |
|
"learning_rate": 8.551131313131313e-05, |
|
"loss": 1.8865, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.7921606954553638, |
|
"grad_norm": 2.7836761474609375, |
|
"learning_rate": 8.546080808080809e-05, |
|
"loss": 1.8922, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.7947326457652838, |
|
"grad_norm": 2.4677138328552246, |
|
"learning_rate": 8.541040404040405e-05, |
|
"loss": 1.8744, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.7973045960752039, |
|
"grad_norm": 2.629953384399414, |
|
"learning_rate": 8.5359898989899e-05, |
|
"loss": 1.8801, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.7998765463851238, |
|
"grad_norm": 2.1538336277008057, |
|
"learning_rate": 8.530939393939394e-05, |
|
"loss": 1.8766, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.8024484966950438, |
|
"grad_norm": 2.37500262260437, |
|
"learning_rate": 8.525888888888889e-05, |
|
"loss": 1.8827, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.8050204470049639, |
|
"grad_norm": 2.6441307067871094, |
|
"learning_rate": 8.520848484848485e-05, |
|
"loss": 1.8739, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.8075923973148839, |
|
"grad_norm": 2.8131062984466553, |
|
"learning_rate": 8.515797979797981e-05, |
|
"loss": 1.8777, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.8101643476248039, |
|
"grad_norm": 2.25876784324646, |
|
"learning_rate": 8.510757575757577e-05, |
|
"loss": 1.891, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.8127362979347239, |
|
"grad_norm": 2.397202253341675, |
|
"learning_rate": 8.50570707070707e-05, |
|
"loss": 1.8917, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.8153082482446439, |
|
"grad_norm": 2.5230774879455566, |
|
"learning_rate": 8.500656565656566e-05, |
|
"loss": 1.9009, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.817880198554564, |
|
"grad_norm": 2.8625664710998535, |
|
"learning_rate": 8.495606060606061e-05, |
|
"loss": 1.8902, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.8204521488644839, |
|
"grad_norm": 2.3342695236206055, |
|
"learning_rate": 8.490555555555557e-05, |
|
"loss": 1.8664, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.823024099174404, |
|
"grad_norm": 2.483473777770996, |
|
"learning_rate": 8.48550505050505e-05, |
|
"loss": 1.8791, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.825596049484324, |
|
"grad_norm": 2.270512342453003, |
|
"learning_rate": 8.480454545454546e-05, |
|
"loss": 1.8758, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.8281679997942439, |
|
"grad_norm": 2.4790780544281006, |
|
"learning_rate": 8.475404040404042e-05, |
|
"loss": 1.8816, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.830739950104164, |
|
"grad_norm": 2.4023377895355225, |
|
"learning_rate": 8.470363636363637e-05, |
|
"loss": 1.8783, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.833311900414084, |
|
"grad_norm": 2.6411328315734863, |
|
"learning_rate": 8.465313131313131e-05, |
|
"loss": 1.8691, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.8358838507240041, |
|
"grad_norm": 2.2638540267944336, |
|
"learning_rate": 8.460262626262627e-05, |
|
"loss": 1.8545, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.838455801033924, |
|
"grad_norm": 2.785778522491455, |
|
"learning_rate": 8.455212121212122e-05, |
|
"loss": 1.8755, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.841027751343844, |
|
"grad_norm": 2.2858121395111084, |
|
"learning_rate": 8.450171717171718e-05, |
|
"loss": 1.8659, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.8435997016537641, |
|
"grad_norm": 2.7761781215667725, |
|
"learning_rate": 8.445121212121212e-05, |
|
"loss": 1.8673, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.846171651963684, |
|
"grad_norm": 3.0068702697753906, |
|
"learning_rate": 8.440070707070707e-05, |
|
"loss": 1.8599, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.8487436022736041, |
|
"grad_norm": 2.3816988468170166, |
|
"learning_rate": 8.435020202020203e-05, |
|
"loss": 1.8687, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.8513155525835241, |
|
"grad_norm": 2.7806084156036377, |
|
"learning_rate": 8.429979797979798e-05, |
|
"loss": 1.8633, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.8538875028934441, |
|
"grad_norm": 2.572535753250122, |
|
"learning_rate": 8.424929292929294e-05, |
|
"loss": 1.8586, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.8564594532033641, |
|
"grad_norm": 2.6891589164733887, |
|
"learning_rate": 8.419878787878788e-05, |
|
"loss": 1.8829, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.8590314035132841, |
|
"grad_norm": 2.2894322872161865, |
|
"learning_rate": 8.414828282828283e-05, |
|
"loss": 1.8539, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.8616033538232042, |
|
"grad_norm": 2.343632459640503, |
|
"learning_rate": 8.409787878787879e-05, |
|
"loss": 1.8492, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.8641753041331242, |
|
"grad_norm": 2.1601314544677734, |
|
"learning_rate": 8.404737373737375e-05, |
|
"loss": 1.869, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.8667472544430441, |
|
"grad_norm": 2.3659918308258057, |
|
"learning_rate": 8.39968686868687e-05, |
|
"loss": 1.8586, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.8693192047529642, |
|
"grad_norm": 1.9559909105300903, |
|
"learning_rate": 8.394646464646465e-05, |
|
"loss": 1.8535, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.8718911550628842, |
|
"grad_norm": 2.3367204666137695, |
|
"learning_rate": 8.38959595959596e-05, |
|
"loss": 1.8438, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.8744631053728043, |
|
"grad_norm": 2.5470831394195557, |
|
"learning_rate": 8.384545454545455e-05, |
|
"loss": 1.8715, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.8770350556827242, |
|
"grad_norm": 1.9904810190200806, |
|
"learning_rate": 8.379494949494951e-05, |
|
"loss": 1.837, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.8796070059926442, |
|
"grad_norm": 2.808014392852783, |
|
"learning_rate": 8.374444444444445e-05, |
|
"loss": 1.8473, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.8821789563025643, |
|
"grad_norm": 2.3761932849884033, |
|
"learning_rate": 8.36939393939394e-05, |
|
"loss": 1.8492, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.8847509066124842, |
|
"grad_norm": 2.5445032119750977, |
|
"learning_rate": 8.364343434343435e-05, |
|
"loss": 1.8537, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.8873228569224043, |
|
"grad_norm": 2.6148016452789307, |
|
"learning_rate": 8.35929292929293e-05, |
|
"loss": 1.8507, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.8898948072323243, |
|
"grad_norm": 2.4389026165008545, |
|
"learning_rate": 8.354242424242424e-05, |
|
"loss": 1.8421, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.8924667575422442, |
|
"grad_norm": 2.1091599464416504, |
|
"learning_rate": 8.34920202020202e-05, |
|
"loss": 1.8543, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.8950387078521643, |
|
"grad_norm": 2.5214107036590576, |
|
"learning_rate": 8.344151515151516e-05, |
|
"loss": 1.8516, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.8976106581620843, |
|
"grad_norm": 2.6828722953796387, |
|
"learning_rate": 8.33910101010101e-05, |
|
"loss": 1.8537, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.9001826084720044, |
|
"grad_norm": 2.204803943634033, |
|
"learning_rate": 8.334050505050506e-05, |
|
"loss": 1.8668, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.9027545587819243, |
|
"grad_norm": 2.917100191116333, |
|
"learning_rate": 8.329e-05, |
|
"loss": 1.8423, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.9053265090918443, |
|
"grad_norm": 2.2125914096832275, |
|
"learning_rate": 8.323959595959596e-05, |
|
"loss": 1.8403, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.9078984594017644, |
|
"grad_norm": 2.3068203926086426, |
|
"learning_rate": 8.318909090909092e-05, |
|
"loss": 1.8499, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.9104704097116844, |
|
"grad_norm": 2.733078956604004, |
|
"learning_rate": 8.313868686868688e-05, |
|
"loss": 1.8387, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.9130423600216044, |
|
"grad_norm": 2.5091042518615723, |
|
"learning_rate": 8.308818181818182e-05, |
|
"loss": 1.8362, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.9156143103315244, |
|
"grad_norm": 2.4861273765563965, |
|
"learning_rate": 8.303767676767677e-05, |
|
"loss": 1.8394, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.9181862606414444, |
|
"grad_norm": 2.519242286682129, |
|
"learning_rate": 8.298717171717172e-05, |
|
"loss": 1.832, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.9207582109513645, |
|
"grad_norm": 2.075767993927002, |
|
"learning_rate": 8.293666666666668e-05, |
|
"loss": 1.8362, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.9233301612612844, |
|
"grad_norm": 2.563034772872925, |
|
"learning_rate": 8.288616161616162e-05, |
|
"loss": 1.8355, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.9259021115712044, |
|
"grad_norm": 2.5027518272399902, |
|
"learning_rate": 8.283565656565657e-05, |
|
"loss": 1.8337, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.9284740618811245, |
|
"grad_norm": 2.341482162475586, |
|
"learning_rate": 8.278525252525253e-05, |
|
"loss": 1.8452, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.9310460121910444, |
|
"grad_norm": 2.5052967071533203, |
|
"learning_rate": 8.273474747474747e-05, |
|
"loss": 1.8337, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.9336179625009645, |
|
"grad_norm": 2.9151535034179688, |
|
"learning_rate": 8.268424242424243e-05, |
|
"loss": 1.8323, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.9361899128108845, |
|
"grad_norm": 2.3366811275482178, |
|
"learning_rate": 8.263383838383839e-05, |
|
"loss": 1.8286, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.9387618631208045, |
|
"grad_norm": 2.044461727142334, |
|
"learning_rate": 8.258333333333334e-05, |
|
"loss": 1.8345, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.9413338134307245, |
|
"grad_norm": 2.488086223602295, |
|
"learning_rate": 8.253282828282829e-05, |
|
"loss": 1.8349, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.9439057637406445, |
|
"grad_norm": 2.246419906616211, |
|
"learning_rate": 8.248232323232323e-05, |
|
"loss": 1.824, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.9464777140505646, |
|
"grad_norm": 2.0991148948669434, |
|
"learning_rate": 8.243181818181819e-05, |
|
"loss": 1.8322, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.9490496643604845, |
|
"grad_norm": 2.6441781520843506, |
|
"learning_rate": 8.238131313131312e-05, |
|
"loss": 1.8341, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.9516216146704045, |
|
"grad_norm": 2.344884157180786, |
|
"learning_rate": 8.233080808080808e-05, |
|
"loss": 1.8177, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.9541935649803246, |
|
"grad_norm": 2.5357608795166016, |
|
"learning_rate": 8.228030303030303e-05, |
|
"loss": 1.8263, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.9567655152902446, |
|
"grad_norm": 2.7352442741394043, |
|
"learning_rate": 8.222979797979799e-05, |
|
"loss": 1.8293, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.9593374656001646, |
|
"grad_norm": 2.9389710426330566, |
|
"learning_rate": 8.217929292929292e-05, |
|
"loss": 1.8141, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.9619094159100846, |
|
"grad_norm": 2.38529634475708, |
|
"learning_rate": 8.212878787878788e-05, |
|
"loss": 1.813, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.9644813662200046, |
|
"grad_norm": 2.8772764205932617, |
|
"learning_rate": 8.207838383838384e-05, |
|
"loss": 1.8231, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.9670533165299247, |
|
"grad_norm": 2.1025900840759277, |
|
"learning_rate": 8.202787878787879e-05, |
|
"loss": 1.8218, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.9696252668398446, |
|
"grad_norm": 2.149860382080078, |
|
"learning_rate": 8.197737373737374e-05, |
|
"loss": 1.8163, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.9721972171497647, |
|
"grad_norm": 2.2093310356140137, |
|
"learning_rate": 8.19268686868687e-05, |
|
"loss": 1.8222, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.9747691674596847, |
|
"grad_norm": 2.126584053039551, |
|
"learning_rate": 8.187636363636364e-05, |
|
"loss": 1.8139, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.9773411177696046, |
|
"grad_norm": 2.6543593406677246, |
|
"learning_rate": 8.182585858585859e-05, |
|
"loss": 1.8258, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.9799130680795247, |
|
"grad_norm": 3.2399909496307373, |
|
"learning_rate": 8.177535353535353e-05, |
|
"loss": 1.8066, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.9824850183894447, |
|
"grad_norm": 2.757171392440796, |
|
"learning_rate": 8.17249494949495e-05, |
|
"loss": 1.8082, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.9850569686993648, |
|
"grad_norm": 2.164072036743164, |
|
"learning_rate": 8.167444444444445e-05, |
|
"loss": 1.8214, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.9876289190092847, |
|
"grad_norm": 2.501775026321411, |
|
"learning_rate": 8.16239393939394e-05, |
|
"loss": 1.813, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.9902008693192047, |
|
"grad_norm": 2.7152421474456787, |
|
"learning_rate": 8.157343434343435e-05, |
|
"loss": 1.8174, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.9927728196291248, |
|
"grad_norm": 2.667201519012451, |
|
"learning_rate": 8.15229292929293e-05, |
|
"loss": 1.8253, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.9953447699390447, |
|
"grad_norm": 2.656597375869751, |
|
"learning_rate": 8.147242424242425e-05, |
|
"loss": 1.8091, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.9979167202489648, |
|
"grad_norm": 2.635948896408081, |
|
"learning_rate": 8.14219191919192e-05, |
|
"loss": 1.8127, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 1.000488670558885, |
|
"grad_norm": 2.38082218170166, |
|
"learning_rate": 8.137141414141415e-05, |
|
"loss": 1.8222, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 1.0030606208688049, |
|
"grad_norm": 3.0616064071655273, |
|
"learning_rate": 8.132090909090909e-05, |
|
"loss": 1.8212, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.0056325711787248, |
|
"grad_norm": 2.3557846546173096, |
|
"learning_rate": 8.127060606060607e-05, |
|
"loss": 1.8092, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 1.0082045214886448, |
|
"grad_norm": 2.4398655891418457, |
|
"learning_rate": 8.122010101010101e-05, |
|
"loss": 1.8157, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 1.0107764717985648, |
|
"grad_norm": 2.373342275619507, |
|
"learning_rate": 8.116959595959597e-05, |
|
"loss": 1.811, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 1.013348422108485, |
|
"grad_norm": 2.491063356399536, |
|
"learning_rate": 8.111909090909092e-05, |
|
"loss": 1.8079, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 1.015920372418405, |
|
"grad_norm": 2.996239185333252, |
|
"learning_rate": 8.106858585858586e-05, |
|
"loss": 1.8104, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 1.018492322728325, |
|
"grad_norm": 2.259913921356201, |
|
"learning_rate": 8.101818181818182e-05, |
|
"loss": 1.8086, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 1.0210642730382449, |
|
"grad_norm": 2.3475708961486816, |
|
"learning_rate": 8.096767676767677e-05, |
|
"loss": 1.8044, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 1.0236362233481648, |
|
"grad_norm": 1.893655776977539, |
|
"learning_rate": 8.091717171717173e-05, |
|
"loss": 1.8083, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 1.026208173658085, |
|
"grad_norm": 2.151472806930542, |
|
"learning_rate": 8.086666666666666e-05, |
|
"loss": 1.8026, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 1.028780123968005, |
|
"grad_norm": 2.5114681720733643, |
|
"learning_rate": 8.081616161616162e-05, |
|
"loss": 1.7933, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.031352074277925, |
|
"grad_norm": 2.255035400390625, |
|
"learning_rate": 8.076565656565657e-05, |
|
"loss": 1.8041, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 1.033924024587845, |
|
"grad_norm": 2.479146957397461, |
|
"learning_rate": 8.071525252525253e-05, |
|
"loss": 1.7984, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 1.036495974897765, |
|
"grad_norm": 2.6387994289398193, |
|
"learning_rate": 8.066474747474749e-05, |
|
"loss": 1.8026, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 1.039067925207685, |
|
"grad_norm": 2.15395188331604, |
|
"learning_rate": 8.061424242424242e-05, |
|
"loss": 1.8088, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 1.041639875517605, |
|
"grad_norm": 2.761543035507202, |
|
"learning_rate": 8.056373737373738e-05, |
|
"loss": 1.8023, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 1.044211825827525, |
|
"grad_norm": 2.5639731884002686, |
|
"learning_rate": 8.051333333333334e-05, |
|
"loss": 1.8009, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 1.046783776137445, |
|
"grad_norm": 2.1359119415283203, |
|
"learning_rate": 8.046282828282829e-05, |
|
"loss": 1.8206, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 1.049355726447365, |
|
"grad_norm": 2.0918943881988525, |
|
"learning_rate": 8.041232323232323e-05, |
|
"loss": 1.7956, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 1.051927676757285, |
|
"grad_norm": 2.1521031856536865, |
|
"learning_rate": 8.036181818181818e-05, |
|
"loss": 1.8062, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 1.0544996270672051, |
|
"grad_norm": 2.2172553539276123, |
|
"learning_rate": 8.031131313131314e-05, |
|
"loss": 1.7936, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 1.057071577377125, |
|
"grad_norm": 3.1185765266418457, |
|
"learning_rate": 8.026080808080809e-05, |
|
"loss": 1.7966, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 1.059643527687045, |
|
"grad_norm": 2.084747314453125, |
|
"learning_rate": 8.021030303030303e-05, |
|
"loss": 1.7851, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 1.062215477996965, |
|
"grad_norm": 2.4494941234588623, |
|
"learning_rate": 8.015979797979798e-05, |
|
"loss": 1.7943, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 1.064787428306885, |
|
"grad_norm": 2.62510347366333, |
|
"learning_rate": 8.010929292929294e-05, |
|
"loss": 1.7931, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 1.0673593786168052, |
|
"grad_norm": 2.6288397312164307, |
|
"learning_rate": 8.00588888888889e-05, |
|
"loss": 1.7964, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 1.0699313289267252, |
|
"grad_norm": 2.5375521183013916, |
|
"learning_rate": 8.000838383838384e-05, |
|
"loss": 1.8035, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.0725032792366451, |
|
"grad_norm": 2.3402857780456543, |
|
"learning_rate": 7.995787878787879e-05, |
|
"loss": 1.7847, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 1.075075229546565, |
|
"grad_norm": 2.824528455734253, |
|
"learning_rate": 7.990737373737374e-05, |
|
"loss": 1.7905, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 1.077647179856485, |
|
"grad_norm": 2.478386878967285, |
|
"learning_rate": 7.98568686868687e-05, |
|
"loss": 1.7894, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 1.0802191301664053, |
|
"grad_norm": 2.576979398727417, |
|
"learning_rate": 7.980636363636363e-05, |
|
"loss": 1.7866, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 1.0827910804763252, |
|
"grad_norm": 2.5241525173187256, |
|
"learning_rate": 7.975585858585859e-05, |
|
"loss": 1.7895, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 1.0853630307862452, |
|
"grad_norm": 2.5618913173675537, |
|
"learning_rate": 7.970535353535355e-05, |
|
"loss": 1.7836, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 1.0879349810961652, |
|
"grad_norm": 2.0089547634124756, |
|
"learning_rate": 7.96549494949495e-05, |
|
"loss": 1.7974, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 1.0905069314060851, |
|
"grad_norm": 2.360208034515381, |
|
"learning_rate": 7.960444444444444e-05, |
|
"loss": 1.7778, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 1.0930788817160053, |
|
"grad_norm": 2.1004722118377686, |
|
"learning_rate": 7.95539393939394e-05, |
|
"loss": 1.7774, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 1.0956508320259253, |
|
"grad_norm": 2.2082858085632324, |
|
"learning_rate": 7.950353535353535e-05, |
|
"loss": 1.7809, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 1.0982227823358452, |
|
"grad_norm": 2.4933605194091797, |
|
"learning_rate": 7.945303030303031e-05, |
|
"loss": 1.7884, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 1.1007947326457652, |
|
"grad_norm": 2.1621594429016113, |
|
"learning_rate": 7.940252525252527e-05, |
|
"loss": 1.7787, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 1.1033666829556852, |
|
"grad_norm": 2.569934368133545, |
|
"learning_rate": 7.93520202020202e-05, |
|
"loss": 1.7805, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 1.1059386332656054, |
|
"grad_norm": 2.512706756591797, |
|
"learning_rate": 7.930151515151516e-05, |
|
"loss": 1.7897, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 1.1085105835755253, |
|
"grad_norm": 2.0574967861175537, |
|
"learning_rate": 7.92510101010101e-05, |
|
"loss": 1.7697, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 1.1110825338854453, |
|
"grad_norm": 2.4195003509521484, |
|
"learning_rate": 7.920060606060607e-05, |
|
"loss": 1.7765, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.1136544841953653, |
|
"grad_norm": 2.6895534992218018, |
|
"learning_rate": 7.915010101010101e-05, |
|
"loss": 1.7842, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 1.1162264345052852, |
|
"grad_norm": 2.3295652866363525, |
|
"learning_rate": 7.909959595959596e-05, |
|
"loss": 1.7801, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 1.1187983848152054, |
|
"grad_norm": 2.4626710414886475, |
|
"learning_rate": 7.904909090909092e-05, |
|
"loss": 1.7863, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 1.1213703351251254, |
|
"grad_norm": 2.438185214996338, |
|
"learning_rate": 7.899858585858587e-05, |
|
"loss": 1.7744, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 1.1239422854350454, |
|
"grad_norm": 2.2876017093658447, |
|
"learning_rate": 7.894808080808081e-05, |
|
"loss": 1.7816, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 1.1265142357449653, |
|
"grad_norm": 2.7953882217407227, |
|
"learning_rate": 7.889757575757576e-05, |
|
"loss": 1.7838, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 1.1290861860548853, |
|
"grad_norm": 2.5806899070739746, |
|
"learning_rate": 7.884717171717172e-05, |
|
"loss": 1.7777, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 1.1316581363648055, |
|
"grad_norm": 2.28183650970459, |
|
"learning_rate": 7.879666666666668e-05, |
|
"loss": 1.7922, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 1.1342300866747255, |
|
"grad_norm": 2.3127825260162354, |
|
"learning_rate": 7.874616161616162e-05, |
|
"loss": 1.7663, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 1.1368020369846454, |
|
"grad_norm": 2.4055662155151367, |
|
"learning_rate": 7.869565656565657e-05, |
|
"loss": 1.7769, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 1.1393739872945654, |
|
"grad_norm": 2.1033191680908203, |
|
"learning_rate": 7.864515151515152e-05, |
|
"loss": 1.7832, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 1.1419459376044854, |
|
"grad_norm": 2.047595500946045, |
|
"learning_rate": 7.859474747474748e-05, |
|
"loss": 1.7693, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 1.1445178879144056, |
|
"grad_norm": 2.706106424331665, |
|
"learning_rate": 7.854424242424244e-05, |
|
"loss": 1.7778, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 1.1470898382243255, |
|
"grad_norm": 2.076641798019409, |
|
"learning_rate": 7.849373737373737e-05, |
|
"loss": 1.7678, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 1.1496617885342455, |
|
"grad_norm": 2.578556537628174, |
|
"learning_rate": 7.844323232323233e-05, |
|
"loss": 1.7795, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 1.1522337388441655, |
|
"grad_norm": 2.0416908264160156, |
|
"learning_rate": 7.839272727272727e-05, |
|
"loss": 1.7665, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.1548056891540854, |
|
"grad_norm": 2.5179026126861572, |
|
"learning_rate": 7.834232323232323e-05, |
|
"loss": 1.7608, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 1.1573776394640056, |
|
"grad_norm": 2.2774341106414795, |
|
"learning_rate": 7.82918181818182e-05, |
|
"loss": 1.7567, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 1.1599495897739256, |
|
"grad_norm": 2.177483558654785, |
|
"learning_rate": 7.824131313131313e-05, |
|
"loss": 1.7672, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 1.1625215400838456, |
|
"grad_norm": 2.516448736190796, |
|
"learning_rate": 7.819080808080809e-05, |
|
"loss": 1.7576, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 1.1650934903937655, |
|
"grad_norm": 2.2014214992523193, |
|
"learning_rate": 7.814030303030303e-05, |
|
"loss": 1.7662, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 1.1676654407036855, |
|
"grad_norm": 2.2554168701171875, |
|
"learning_rate": 7.808979797979798e-05, |
|
"loss": 1.7719, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 1.1702373910136057, |
|
"grad_norm": 2.5222623348236084, |
|
"learning_rate": 7.803939393939394e-05, |
|
"loss": 1.777, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 1.1728093413235257, |
|
"grad_norm": 2.1105360984802246, |
|
"learning_rate": 7.798888888888889e-05, |
|
"loss": 1.7654, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 1.1753812916334456, |
|
"grad_norm": 2.4991660118103027, |
|
"learning_rate": 7.793838383838385e-05, |
|
"loss": 1.7622, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 1.1779532419433656, |
|
"grad_norm": 2.394397258758545, |
|
"learning_rate": 7.788787878787879e-05, |
|
"loss": 1.763, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 1.1805251922532856, |
|
"grad_norm": 2.5834200382232666, |
|
"learning_rate": 7.783737373737374e-05, |
|
"loss": 1.7636, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 1.1830971425632058, |
|
"grad_norm": 2.1750988960266113, |
|
"learning_rate": 7.778686868686868e-05, |
|
"loss": 1.7712, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 1.1856690928731257, |
|
"grad_norm": 2.460362195968628, |
|
"learning_rate": 7.773636363636364e-05, |
|
"loss": 1.7695, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 1.1882410431830457, |
|
"grad_norm": 2.492896795272827, |
|
"learning_rate": 7.768585858585858e-05, |
|
"loss": 1.7628, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 1.1908129934929657, |
|
"grad_norm": 2.5049636363983154, |
|
"learning_rate": 7.763545454545455e-05, |
|
"loss": 1.7595, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 1.1933849438028856, |
|
"grad_norm": 2.638702630996704, |
|
"learning_rate": 7.75849494949495e-05, |
|
"loss": 1.7716, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.1959568941128058, |
|
"grad_norm": 2.3910155296325684, |
|
"learning_rate": 7.753444444444444e-05, |
|
"loss": 1.7682, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 1.1985288444227258, |
|
"grad_norm": 2.247044563293457, |
|
"learning_rate": 7.74840404040404e-05, |
|
"loss": 1.7625, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 1.2011007947326457, |
|
"grad_norm": 2.289677858352661, |
|
"learning_rate": 7.743353535353536e-05, |
|
"loss": 1.7632, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 1.2036727450425657, |
|
"grad_norm": 2.5424296855926514, |
|
"learning_rate": 7.73830303030303e-05, |
|
"loss": 1.7672, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 1.2062446953524857, |
|
"grad_norm": 2.1238250732421875, |
|
"learning_rate": 7.733252525252526e-05, |
|
"loss": 1.7547, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 1.2088166456624059, |
|
"grad_norm": 2.2579052448272705, |
|
"learning_rate": 7.728202020202022e-05, |
|
"loss": 1.7606, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 1.2113885959723258, |
|
"grad_norm": 2.3846943378448486, |
|
"learning_rate": 7.723151515151515e-05, |
|
"loss": 1.744, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 1.2139605462822458, |
|
"grad_norm": 2.23209547996521, |
|
"learning_rate": 7.718101010101011e-05, |
|
"loss": 1.7643, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 1.2165324965921658, |
|
"grad_norm": 2.6672916412353516, |
|
"learning_rate": 7.713050505050505e-05, |
|
"loss": 1.7561, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 1.2191044469020857, |
|
"grad_norm": 2.5802114009857178, |
|
"learning_rate": 7.708010101010101e-05, |
|
"loss": 1.7613, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 1.221676397212006, |
|
"grad_norm": 2.311035633087158, |
|
"learning_rate": 7.702959595959597e-05, |
|
"loss": 1.7536, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 1.224248347521926, |
|
"grad_norm": 2.2888970375061035, |
|
"learning_rate": 7.697919191919192e-05, |
|
"loss": 1.7454, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 1.2268202978318459, |
|
"grad_norm": 2.203408718109131, |
|
"learning_rate": 7.692868686868687e-05, |
|
"loss": 1.7496, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 1.2293922481417658, |
|
"grad_norm": 2.1793553829193115, |
|
"learning_rate": 7.687818181818183e-05, |
|
"loss": 1.7681, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 1.2319641984516858, |
|
"grad_norm": 2.3608551025390625, |
|
"learning_rate": 7.682767676767677e-05, |
|
"loss": 1.7521, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 1.234536148761606, |
|
"grad_norm": 2.602651834487915, |
|
"learning_rate": 7.677717171717172e-05, |
|
"loss": 1.7689, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.237108099071526, |
|
"grad_norm": 2.261465311050415, |
|
"learning_rate": 7.672666666666667e-05, |
|
"loss": 1.7514, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 1.239680049381446, |
|
"grad_norm": 2.375920057296753, |
|
"learning_rate": 7.667616161616162e-05, |
|
"loss": 1.7579, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 1.242251999691366, |
|
"grad_norm": 2.47737979888916, |
|
"learning_rate": 7.662575757575758e-05, |
|
"loss": 1.7576, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 1.2448239500012859, |
|
"grad_norm": 2.7517123222351074, |
|
"learning_rate": 7.657525252525253e-05, |
|
"loss": 1.7527, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 1.247395900311206, |
|
"grad_norm": 2.765855073928833, |
|
"learning_rate": 7.652474747474748e-05, |
|
"loss": 1.7442, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 1.249967850621126, |
|
"grad_norm": 2.3727500438690186, |
|
"learning_rate": 7.647424242424242e-05, |
|
"loss": 1.7513, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 1.252539800931046, |
|
"grad_norm": 2.3826792240142822, |
|
"learning_rate": 7.642373737373738e-05, |
|
"loss": 1.7539, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 1.255111751240966, |
|
"grad_norm": 2.1369845867156982, |
|
"learning_rate": 7.637323232323233e-05, |
|
"loss": 1.7457, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 1.257683701550886, |
|
"grad_norm": 2.8363897800445557, |
|
"learning_rate": 7.632272727272728e-05, |
|
"loss": 1.7489, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 1.2602556518608061, |
|
"grad_norm": 2.043923854827881, |
|
"learning_rate": 7.627232323232324e-05, |
|
"loss": 1.7399, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 1.262827602170726, |
|
"grad_norm": 2.7618696689605713, |
|
"learning_rate": 7.622181818181818e-05, |
|
"loss": 1.7444, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 1.265399552480646, |
|
"grad_norm": 2.689225435256958, |
|
"learning_rate": 7.617131313131314e-05, |
|
"loss": 1.7489, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 1.267971502790566, |
|
"grad_norm": 2.448422908782959, |
|
"learning_rate": 7.612080808080807e-05, |
|
"loss": 1.7428, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 1.270543453100486, |
|
"grad_norm": 2.5466957092285156, |
|
"learning_rate": 7.607030303030303e-05, |
|
"loss": 1.7463, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 1.2731154034104062, |
|
"grad_norm": 2.244110107421875, |
|
"learning_rate": 7.6019898989899e-05, |
|
"loss": 1.7467, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 1.2756873537203262, |
|
"grad_norm": 2.1423609256744385, |
|
"learning_rate": 7.596939393939394e-05, |
|
"loss": 1.7502, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.2782593040302461, |
|
"grad_norm": 2.408640146255493, |
|
"learning_rate": 7.59188888888889e-05, |
|
"loss": 1.7405, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 1.280831254340166, |
|
"grad_norm": 2.5381617546081543, |
|
"learning_rate": 7.586838383838383e-05, |
|
"loss": 1.7383, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 1.283403204650086, |
|
"grad_norm": 2.206977128982544, |
|
"learning_rate": 7.581787878787879e-05, |
|
"loss": 1.7377, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 1.2859751549600063, |
|
"grad_norm": 2.2149858474731445, |
|
"learning_rate": 7.576737373737374e-05, |
|
"loss": 1.7362, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 1.2885471052699262, |
|
"grad_norm": 2.614354372024536, |
|
"learning_rate": 7.571686868686869e-05, |
|
"loss": 1.7517, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 1.2911190555798462, |
|
"grad_norm": 2.1546077728271484, |
|
"learning_rate": 7.566646464646465e-05, |
|
"loss": 1.7281, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 1.2936910058897662, |
|
"grad_norm": 2.150606632232666, |
|
"learning_rate": 7.561595959595959e-05, |
|
"loss": 1.7525, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 1.2962629561996861, |
|
"grad_norm": 2.4622044563293457, |
|
"learning_rate": 7.556545454545455e-05, |
|
"loss": 1.7407, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 1.2988349065096063, |
|
"grad_norm": 2.383789300918579, |
|
"learning_rate": 7.55149494949495e-05, |
|
"loss": 1.7401, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 1.3014068568195263, |
|
"grad_norm": 2.7778983116149902, |
|
"learning_rate": 7.546454545454546e-05, |
|
"loss": 1.7298, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 1.3039788071294462, |
|
"grad_norm": 2.69973087310791, |
|
"learning_rate": 7.54140404040404e-05, |
|
"loss": 1.7298, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 1.3065507574393662, |
|
"grad_norm": 2.866455554962158, |
|
"learning_rate": 7.536353535353535e-05, |
|
"loss": 1.7421, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 1.3091227077492862, |
|
"grad_norm": 2.307335615158081, |
|
"learning_rate": 7.531303030303031e-05, |
|
"loss": 1.7427, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 1.3116946580592064, |
|
"grad_norm": 2.242201089859009, |
|
"learning_rate": 7.526252525252526e-05, |
|
"loss": 1.7406, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 1.3142666083691263, |
|
"grad_norm": 2.3447513580322266, |
|
"learning_rate": 7.52120202020202e-05, |
|
"loss": 1.7219, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 1.3168385586790463, |
|
"grad_norm": 2.4869656562805176, |
|
"learning_rate": 7.516151515151516e-05, |
|
"loss": 1.7247, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.3194105089889663, |
|
"grad_norm": 3.0479238033294678, |
|
"learning_rate": 7.511101010101011e-05, |
|
"loss": 1.7387, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 1.3219824592988862, |
|
"grad_norm": 2.106835126876831, |
|
"learning_rate": 7.506060606060607e-05, |
|
"loss": 1.7436, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 1.3245544096088064, |
|
"grad_norm": 2.6086888313293457, |
|
"learning_rate": 7.5010101010101e-05, |
|
"loss": 1.7299, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 1.3271263599187264, |
|
"grad_norm": 2.5068061351776123, |
|
"learning_rate": 7.495959595959596e-05, |
|
"loss": 1.727, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 1.3296983102286464, |
|
"grad_norm": 2.0098962783813477, |
|
"learning_rate": 7.490909090909092e-05, |
|
"loss": 1.7233, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 1.3322702605385663, |
|
"grad_norm": 2.0728952884674072, |
|
"learning_rate": 7.485858585858587e-05, |
|
"loss": 1.7053, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 1.3348422108484863, |
|
"grad_norm": 2.0596702098846436, |
|
"learning_rate": 7.480808080808081e-05, |
|
"loss": 1.7309, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 1.3374141611584065, |
|
"grad_norm": 2.2352986335754395, |
|
"learning_rate": 7.475757575757576e-05, |
|
"loss": 1.7363, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 1.3399861114683265, |
|
"grad_norm": 2.318910598754883, |
|
"learning_rate": 7.470707070707072e-05, |
|
"loss": 1.7329, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 1.3425580617782464, |
|
"grad_norm": 2.536661148071289, |
|
"learning_rate": 7.465666666666668e-05, |
|
"loss": 1.7263, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 1.3451300120881664, |
|
"grad_norm": 2.216972827911377, |
|
"learning_rate": 7.460616161616161e-05, |
|
"loss": 1.7328, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 1.3477019623980864, |
|
"grad_norm": 2.4291155338287354, |
|
"learning_rate": 7.455565656565657e-05, |
|
"loss": 1.7299, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 1.3502739127080066, |
|
"grad_norm": 2.5120067596435547, |
|
"learning_rate": 7.450515151515152e-05, |
|
"loss": 1.7398, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 1.3528458630179265, |
|
"grad_norm": 2.61008358001709, |
|
"learning_rate": 7.445474747474748e-05, |
|
"loss": 1.7333, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 1.3554178133278465, |
|
"grad_norm": 2.112347364425659, |
|
"learning_rate": 7.440424242424244e-05, |
|
"loss": 1.7215, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 1.3579897636377665, |
|
"grad_norm": 2.860222339630127, |
|
"learning_rate": 7.435373737373737e-05, |
|
"loss": 1.727, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.3605617139476864, |
|
"grad_norm": 2.319789171218872, |
|
"learning_rate": 7.430323232323233e-05, |
|
"loss": 1.7278, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 1.3631336642576066, |
|
"grad_norm": 2.808403253555298, |
|
"learning_rate": 7.425282828282829e-05, |
|
"loss": 1.7404, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 1.3657056145675266, |
|
"grad_norm": 2.207468271255493, |
|
"learning_rate": 7.420232323232324e-05, |
|
"loss": 1.7247, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 1.3682775648774466, |
|
"grad_norm": 3.101154327392578, |
|
"learning_rate": 7.415181818181818e-05, |
|
"loss": 1.7326, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 1.3708495151873665, |
|
"grad_norm": 2.5844483375549316, |
|
"learning_rate": 7.410131313131313e-05, |
|
"loss": 1.7153, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 1.3734214654972865, |
|
"grad_norm": 2.1961023807525635, |
|
"learning_rate": 7.405090909090909e-05, |
|
"loss": 1.7174, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 1.3759934158072067, |
|
"grad_norm": 2.372945785522461, |
|
"learning_rate": 7.400050505050505e-05, |
|
"loss": 1.728, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 1.3785653661171267, |
|
"grad_norm": 2.262930154800415, |
|
"learning_rate": 7.395000000000001e-05, |
|
"loss": 1.7088, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 1.3811373164270466, |
|
"grad_norm": 2.2142205238342285, |
|
"learning_rate": 7.389949494949495e-05, |
|
"loss": 1.7111, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 1.3837092667369666, |
|
"grad_norm": 3.059236526489258, |
|
"learning_rate": 7.38489898989899e-05, |
|
"loss": 1.7179, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 1.3862812170468866, |
|
"grad_norm": 2.1427500247955322, |
|
"learning_rate": 7.379848484848485e-05, |
|
"loss": 1.722, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 1.3888531673568068, |
|
"grad_norm": 2.4149832725524902, |
|
"learning_rate": 7.374808080808081e-05, |
|
"loss": 1.7259, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.3914251176667267, |
|
"grad_norm": 2.1872212886810303, |
|
"learning_rate": 7.369757575757577e-05, |
|
"loss": 1.7188, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 1.3939970679766467, |
|
"grad_norm": 2.333991289138794, |
|
"learning_rate": 7.364707070707071e-05, |
|
"loss": 1.7222, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 1.3965690182865667, |
|
"grad_norm": 2.5313849449157715, |
|
"learning_rate": 7.359656565656566e-05, |
|
"loss": 1.7184, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 1.3991409685964866, |
|
"grad_norm": 2.467475175857544, |
|
"learning_rate": 7.35460606060606e-05, |
|
"loss": 1.7288, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.4017129189064068, |
|
"grad_norm": 2.3604865074157715, |
|
"learning_rate": 7.349555555555557e-05, |
|
"loss": 1.7194, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 1.4042848692163268, |
|
"grad_norm": 2.3482818603515625, |
|
"learning_rate": 7.34450505050505e-05, |
|
"loss": 1.7148, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 1.4068568195262467, |
|
"grad_norm": 2.384766101837158, |
|
"learning_rate": 7.339454545454546e-05, |
|
"loss": 1.7046, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 1.4094287698361667, |
|
"grad_norm": 2.6986968517303467, |
|
"learning_rate": 7.334414141414142e-05, |
|
"loss": 1.7137, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 1.4120007201460867, |
|
"grad_norm": 2.383161783218384, |
|
"learning_rate": 7.329373737373738e-05, |
|
"loss": 1.7206, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 1.4145726704560069, |
|
"grad_norm": 2.5386579036712646, |
|
"learning_rate": 7.324323232323232e-05, |
|
"loss": 1.7127, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.4171446207659268, |
|
"grad_norm": 2.8972415924072266, |
|
"learning_rate": 7.319272727272728e-05, |
|
"loss": 1.7088, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 1.4197165710758468, |
|
"grad_norm": 2.8067967891693115, |
|
"learning_rate": 7.314222222222222e-05, |
|
"loss": 1.7177, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 1.4222885213857668, |
|
"grad_norm": 1.916225552558899, |
|
"learning_rate": 7.309171717171718e-05, |
|
"loss": 1.7019, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 1.4248604716956867, |
|
"grad_norm": 3.040851354598999, |
|
"learning_rate": 7.304121212121212e-05, |
|
"loss": 1.7041, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 1.427432422005607, |
|
"grad_norm": 2.5603034496307373, |
|
"learning_rate": 7.299070707070707e-05, |
|
"loss": 1.7071, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 1.430004372315527, |
|
"grad_norm": 3.5265140533447266, |
|
"learning_rate": 7.294030303030304e-05, |
|
"loss": 1.711, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 1.4325763226254469, |
|
"grad_norm": 2.5686593055725098, |
|
"learning_rate": 7.2889898989899e-05, |
|
"loss": 1.7125, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 1.4351482729353668, |
|
"grad_norm": 2.419116735458374, |
|
"learning_rate": 7.283939393939393e-05, |
|
"loss": 1.702, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 1.4377202232452868, |
|
"grad_norm": 2.6491827964782715, |
|
"learning_rate": 7.27888888888889e-05, |
|
"loss": 1.7198, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 1.440292173555207, |
|
"grad_norm": 2.181264638900757, |
|
"learning_rate": 7.273838383838384e-05, |
|
"loss": 1.7124, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.442864123865127, |
|
"grad_norm": 2.609100580215454, |
|
"learning_rate": 7.268787878787879e-05, |
|
"loss": 1.727, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 1.445436074175047, |
|
"grad_norm": 2.866640329360962, |
|
"learning_rate": 7.263747474747476e-05, |
|
"loss": 1.7117, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 1.448008024484967, |
|
"grad_norm": 2.657816171646118, |
|
"learning_rate": 7.25869696969697e-05, |
|
"loss": 1.7248, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 1.4505799747948869, |
|
"grad_norm": 2.376187801361084, |
|
"learning_rate": 7.253646464646465e-05, |
|
"loss": 1.7056, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 1.453151925104807, |
|
"grad_norm": 2.379953622817993, |
|
"learning_rate": 7.24859595959596e-05, |
|
"loss": 1.7167, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 1.455723875414727, |
|
"grad_norm": 2.7846200466156006, |
|
"learning_rate": 7.243545454545455e-05, |
|
"loss": 1.7134, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 1.458295825724647, |
|
"grad_norm": 2.3728222846984863, |
|
"learning_rate": 7.238494949494949e-05, |
|
"loss": 1.6974, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 1.460867776034567, |
|
"grad_norm": 2.185354232788086, |
|
"learning_rate": 7.233444444444445e-05, |
|
"loss": 1.7095, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 1.463439726344487, |
|
"grad_norm": 2.393312454223633, |
|
"learning_rate": 7.22839393939394e-05, |
|
"loss": 1.6992, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 1.4660116766544071, |
|
"grad_norm": 2.4728591442108154, |
|
"learning_rate": 7.223343434343434e-05, |
|
"loss": 1.7096, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 1.468583626964327, |
|
"grad_norm": 2.379149913787842, |
|
"learning_rate": 7.21830303030303e-05, |
|
"loss": 1.7051, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 1.471155577274247, |
|
"grad_norm": 2.3946895599365234, |
|
"learning_rate": 7.213252525252525e-05, |
|
"loss": 1.7051, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 1.473727527584167, |
|
"grad_norm": 2.4574227333068848, |
|
"learning_rate": 7.208202020202021e-05, |
|
"loss": 1.7048, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 1.476299477894087, |
|
"grad_norm": 2.5250046253204346, |
|
"learning_rate": 7.203151515151514e-05, |
|
"loss": 1.7008, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 1.4788714282040072, |
|
"grad_norm": 2.5990653038024902, |
|
"learning_rate": 7.198111111111112e-05, |
|
"loss": 1.6975, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 1.4814433785139272, |
|
"grad_norm": 2.3256866931915283, |
|
"learning_rate": 7.193060606060606e-05, |
|
"loss": 1.6982, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.4840153288238471, |
|
"grad_norm": 2.4116110801696777, |
|
"learning_rate": 7.188010101010101e-05, |
|
"loss": 1.7023, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 1.486587279133767, |
|
"grad_norm": 2.2912509441375732, |
|
"learning_rate": 7.182959595959597e-05, |
|
"loss": 1.6999, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 1.489159229443687, |
|
"grad_norm": 2.7787649631500244, |
|
"learning_rate": 7.177909090909092e-05, |
|
"loss": 1.6979, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 1.4917311797536073, |
|
"grad_norm": 2.0487236976623535, |
|
"learning_rate": 7.172858585858586e-05, |
|
"loss": 1.697, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.4943031300635272, |
|
"grad_norm": 2.3088083267211914, |
|
"learning_rate": 7.167808080808082e-05, |
|
"loss": 1.6906, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 1.4968750803734472, |
|
"grad_norm": 2.1930689811706543, |
|
"learning_rate": 7.162767676767677e-05, |
|
"loss": 1.71, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 1.4994470306833672, |
|
"grad_norm": 2.6284825801849365, |
|
"learning_rate": 7.157717171717171e-05, |
|
"loss": 1.704, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 1.5020189809932871, |
|
"grad_norm": 2.0390841960906982, |
|
"learning_rate": 7.152676767676769e-05, |
|
"loss": 1.7005, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 1.5045909313032073, |
|
"grad_norm": 2.472266674041748, |
|
"learning_rate": 7.147626262626262e-05, |
|
"loss": 1.6911, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 1.5071628816131273, |
|
"grad_norm": 2.0675249099731445, |
|
"learning_rate": 7.142575757575758e-05, |
|
"loss": 1.7018, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 1.5097348319230472, |
|
"grad_norm": 2.693594217300415, |
|
"learning_rate": 7.137525252525254e-05, |
|
"loss": 1.6849, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 1.5123067822329672, |
|
"grad_norm": 2.4996039867401123, |
|
"learning_rate": 7.132474747474747e-05, |
|
"loss": 1.7032, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 1.5148787325428872, |
|
"grad_norm": 2.3143088817596436, |
|
"learning_rate": 7.127424242424243e-05, |
|
"loss": 1.702, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 1.5174506828528074, |
|
"grad_norm": 2.636171340942383, |
|
"learning_rate": 7.122373737373738e-05, |
|
"loss": 1.6903, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 1.5200226331627273, |
|
"grad_norm": 2.3447632789611816, |
|
"learning_rate": 7.117323232323233e-05, |
|
"loss": 1.7032, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 1.5225945834726473, |
|
"grad_norm": 1.977137565612793, |
|
"learning_rate": 7.112272727272727e-05, |
|
"loss": 1.6845, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.5251665337825673, |
|
"grad_norm": 2.250196695327759, |
|
"learning_rate": 7.107232323232323e-05, |
|
"loss": 1.687, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 1.5277384840924872, |
|
"grad_norm": 2.750044345855713, |
|
"learning_rate": 7.102181818181819e-05, |
|
"loss": 1.6999, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 1.5303104344024074, |
|
"grad_norm": 2.4571657180786133, |
|
"learning_rate": 7.097131313131314e-05, |
|
"loss": 1.6919, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 1.5328823847123274, |
|
"grad_norm": 2.9166290760040283, |
|
"learning_rate": 7.092080808080808e-05, |
|
"loss": 1.6857, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 1.5354543350222474, |
|
"grad_norm": 2.9264209270477295, |
|
"learning_rate": 7.087040404040404e-05, |
|
"loss": 1.6778, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 1.5380262853321673, |
|
"grad_norm": 2.910644769668579, |
|
"learning_rate": 7.081989898989899e-05, |
|
"loss": 1.6869, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 1.5405982356420873, |
|
"grad_norm": 2.3062753677368164, |
|
"learning_rate": 7.076939393939395e-05, |
|
"loss": 1.707, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 1.5431701859520075, |
|
"grad_norm": 2.345658302307129, |
|
"learning_rate": 7.07188888888889e-05, |
|
"loss": 1.6887, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.5457421362619275, |
|
"grad_norm": 2.5615222454071045, |
|
"learning_rate": 7.066838383838384e-05, |
|
"loss": 1.6918, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 1.5483140865718474, |
|
"grad_norm": 2.4387075901031494, |
|
"learning_rate": 7.061787878787879e-05, |
|
"loss": 1.6967, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 1.5508860368817674, |
|
"grad_norm": 2.2662642002105713, |
|
"learning_rate": 7.056737373737375e-05, |
|
"loss": 1.7053, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 1.5534579871916874, |
|
"grad_norm": 2.526573896408081, |
|
"learning_rate": 7.051686868686868e-05, |
|
"loss": 1.6866, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 1.5560299375016076, |
|
"grad_norm": 2.2950527667999268, |
|
"learning_rate": 7.046656565656567e-05, |
|
"loss": 1.683, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 1.5586018878115275, |
|
"grad_norm": 2.3456244468688965, |
|
"learning_rate": 7.041606060606061e-05, |
|
"loss": 1.6864, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 1.5611738381214475, |
|
"grad_norm": 2.326719284057617, |
|
"learning_rate": 7.036555555555556e-05, |
|
"loss": 1.6894, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 1.5637457884313675, |
|
"grad_norm": 2.5892398357391357, |
|
"learning_rate": 7.031515151515152e-05, |
|
"loss": 1.6853, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.5663177387412874, |
|
"grad_norm": 2.476912260055542, |
|
"learning_rate": 7.026464646464647e-05, |
|
"loss": 1.6815, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 1.5688896890512076, |
|
"grad_norm": 2.147064685821533, |
|
"learning_rate": 7.021414141414143e-05, |
|
"loss": 1.6883, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 1.5714616393611276, |
|
"grad_norm": 2.761141061782837, |
|
"learning_rate": 7.016363636363636e-05, |
|
"loss": 1.679, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 1.5740335896710476, |
|
"grad_norm": 2.316796064376831, |
|
"learning_rate": 7.011313131313132e-05, |
|
"loss": 1.6925, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 1.5766055399809675, |
|
"grad_norm": 2.4468626976013184, |
|
"learning_rate": 7.006262626262627e-05, |
|
"loss": 1.6923, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 1.5791774902908875, |
|
"grad_norm": 2.4432520866394043, |
|
"learning_rate": 7.001212121212121e-05, |
|
"loss": 1.6863, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 1.5817494406008077, |
|
"grad_norm": 2.5849692821502686, |
|
"learning_rate": 6.996161616161616e-05, |
|
"loss": 1.6831, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 1.5843213909107277, |
|
"grad_norm": 2.266772985458374, |
|
"learning_rate": 6.991111111111112e-05, |
|
"loss": 1.6821, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 1.5868933412206476, |
|
"grad_norm": 2.161853313446045, |
|
"learning_rate": 6.986060606060606e-05, |
|
"loss": 1.6805, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 1.5894652915305676, |
|
"grad_norm": 2.5699236392974854, |
|
"learning_rate": 6.981010101010101e-05, |
|
"loss": 1.6879, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 1.5920372418404876, |
|
"grad_norm": 2.3673970699310303, |
|
"learning_rate": 6.975969696969697e-05, |
|
"loss": 1.6765, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 1.5946091921504078, |
|
"grad_norm": 2.225632667541504, |
|
"learning_rate": 6.970919191919192e-05, |
|
"loss": 1.6847, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.5971811424603277, |
|
"grad_norm": 2.2272884845733643, |
|
"learning_rate": 6.965868686868688e-05, |
|
"loss": 1.6769, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 1.5997530927702477, |
|
"grad_norm": 2.319474458694458, |
|
"learning_rate": 6.960818181818182e-05, |
|
"loss": 1.6831, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 1.6023250430801677, |
|
"grad_norm": 2.1718974113464355, |
|
"learning_rate": 6.955767676767677e-05, |
|
"loss": 1.6638, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 1.6048969933900876, |
|
"grad_norm": 2.3438401222229004, |
|
"learning_rate": 6.950717171717172e-05, |
|
"loss": 1.6737, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.6074689437000078, |
|
"grad_norm": 1.9681246280670166, |
|
"learning_rate": 6.945666666666668e-05, |
|
"loss": 1.6682, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 1.6100408940099278, |
|
"grad_norm": 2.5999867916107178, |
|
"learning_rate": 6.940616161616162e-05, |
|
"loss": 1.6861, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 1.6126128443198477, |
|
"grad_norm": 2.4516825675964355, |
|
"learning_rate": 6.935575757575757e-05, |
|
"loss": 1.6838, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 1.6151847946297677, |
|
"grad_norm": 2.1580958366394043, |
|
"learning_rate": 6.930525252525253e-05, |
|
"loss": 1.6751, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 1.6177567449396877, |
|
"grad_norm": 2.6636695861816406, |
|
"learning_rate": 6.925474747474749e-05, |
|
"loss": 1.6781, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 1.6203286952496079, |
|
"grad_norm": 2.1307785511016846, |
|
"learning_rate": 6.920424242424242e-05, |
|
"loss": 1.6763, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 1.6229006455595278, |
|
"grad_norm": 2.4927167892456055, |
|
"learning_rate": 6.91538383838384e-05, |
|
"loss": 1.6755, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 1.6254725958694478, |
|
"grad_norm": 1.9655892848968506, |
|
"learning_rate": 6.910333333333334e-05, |
|
"loss": 1.6839, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 1.6280445461793678, |
|
"grad_norm": 2.2941057682037354, |
|
"learning_rate": 6.905282828282829e-05, |
|
"loss": 1.6739, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 1.6306164964892877, |
|
"grad_norm": 2.4142115116119385, |
|
"learning_rate": 6.900232323232325e-05, |
|
"loss": 1.6843, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 1.633188446799208, |
|
"grad_norm": 2.138962745666504, |
|
"learning_rate": 6.895181818181818e-05, |
|
"loss": 1.6809, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 1.635760397109128, |
|
"grad_norm": 2.6460509300231934, |
|
"learning_rate": 6.890131313131314e-05, |
|
"loss": 1.6733, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 1.6383323474190479, |
|
"grad_norm": 2.2773749828338623, |
|
"learning_rate": 6.885080808080809e-05, |
|
"loss": 1.6671, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 1.6409042977289678, |
|
"grad_norm": 2.1762917041778564, |
|
"learning_rate": 6.880030303030303e-05, |
|
"loss": 1.6649, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 1.6434762480388878, |
|
"grad_norm": 2.4022064208984375, |
|
"learning_rate": 6.874989898989899e-05, |
|
"loss": 1.6667, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 1.646048198348808, |
|
"grad_norm": 2.392923355102539, |
|
"learning_rate": 6.869939393939394e-05, |
|
"loss": 1.6735, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.648620148658728, |
|
"grad_norm": 2.8275463581085205, |
|
"learning_rate": 6.86488888888889e-05, |
|
"loss": 1.667, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 1.651192098968648, |
|
"grad_norm": 2.8365330696105957, |
|
"learning_rate": 6.859838383838384e-05, |
|
"loss": 1.6766, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 1.653764049278568, |
|
"grad_norm": 2.6010117530822754, |
|
"learning_rate": 6.854787878787879e-05, |
|
"loss": 1.6707, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 1.6563359995884879, |
|
"grad_norm": 2.6623294353485107, |
|
"learning_rate": 6.849747474747475e-05, |
|
"loss": 1.6676, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 1.658907949898408, |
|
"grad_norm": 2.760723114013672, |
|
"learning_rate": 6.844707070707071e-05, |
|
"loss": 1.6634, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 1.6614799002083278, |
|
"grad_norm": 2.240460157394409, |
|
"learning_rate": 6.839656565656566e-05, |
|
"loss": 1.6613, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 1.664051850518248, |
|
"grad_norm": 2.0668253898620605, |
|
"learning_rate": 6.834606060606062e-05, |
|
"loss": 1.6664, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 1.666623800828168, |
|
"grad_norm": 2.19256329536438, |
|
"learning_rate": 6.829555555555556e-05, |
|
"loss": 1.6632, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 1.669195751138088, |
|
"grad_norm": 2.7215864658355713, |
|
"learning_rate": 6.824505050505051e-05, |
|
"loss": 1.6662, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 1.6717677014480081, |
|
"grad_norm": 2.0605878829956055, |
|
"learning_rate": 6.819454545454545e-05, |
|
"loss": 1.6615, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 1.6743396517579279, |
|
"grad_norm": 2.1403868198394775, |
|
"learning_rate": 6.814404040404041e-05, |
|
"loss": 1.6798, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 1.676911602067848, |
|
"grad_norm": 2.322628974914551, |
|
"learning_rate": 6.809353535353535e-05, |
|
"loss": 1.6739, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 1.679483552377768, |
|
"grad_norm": 2.2708230018615723, |
|
"learning_rate": 6.804303030303031e-05, |
|
"loss": 1.6635, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 1.682055502687688, |
|
"grad_norm": 2.4940547943115234, |
|
"learning_rate": 6.799252525252525e-05, |
|
"loss": 1.6679, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 1.6846274529976082, |
|
"grad_norm": 2.149888038635254, |
|
"learning_rate": 6.794202020202021e-05, |
|
"loss": 1.6773, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 1.687199403307528, |
|
"grad_norm": 2.544126272201538, |
|
"learning_rate": 6.789151515151515e-05, |
|
"loss": 1.6754, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.6897713536174481, |
|
"grad_norm": 2.3829123973846436, |
|
"learning_rate": 6.78410101010101e-05, |
|
"loss": 1.665, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 1.692343303927368, |
|
"grad_norm": 2.3244376182556152, |
|
"learning_rate": 6.779060606060607e-05, |
|
"loss": 1.6724, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 2.288402557373047, |
|
"learning_rate": 6.774010101010101e-05, |
|
"loss": 1.6534, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 1.6974872045472083, |
|
"grad_norm": 2.2815768718719482, |
|
"learning_rate": 6.768969696969697e-05, |
|
"loss": 1.6664, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.700059154857128, |
|
"grad_norm": 2.458909749984741, |
|
"learning_rate": 6.763929292929293e-05, |
|
"loss": 1.669, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 1.7026311051670482, |
|
"grad_norm": 2.744945764541626, |
|
"learning_rate": 6.758878787878789e-05, |
|
"loss": 1.6688, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 1.7052030554769682, |
|
"grad_norm": 2.7508599758148193, |
|
"learning_rate": 6.753828282828282e-05, |
|
"loss": 1.6562, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 1.7077750057868881, |
|
"grad_norm": 2.8219707012176514, |
|
"learning_rate": 6.748777777777778e-05, |
|
"loss": 1.6542, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 1.7103469560968083, |
|
"grad_norm": 2.6453421115875244, |
|
"learning_rate": 6.743727272727273e-05, |
|
"loss": 1.6508, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 1.712918906406728, |
|
"grad_norm": 2.9267029762268066, |
|
"learning_rate": 6.738676767676768e-05, |
|
"loss": 1.6559, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 1.7154908567166482, |
|
"grad_norm": 2.5373966693878174, |
|
"learning_rate": 6.733626262626262e-05, |
|
"loss": 1.6683, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 1.7180628070265682, |
|
"grad_norm": 2.3234028816223145, |
|
"learning_rate": 6.728575757575758e-05, |
|
"loss": 1.655, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 1.7206347573364882, |
|
"grad_norm": 2.189422845840454, |
|
"learning_rate": 6.723525252525253e-05, |
|
"loss": 1.6492, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 1.7232067076464084, |
|
"grad_norm": 2.491847038269043, |
|
"learning_rate": 6.718474747474748e-05, |
|
"loss": 1.6372, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 1.7257786579563281, |
|
"grad_norm": 3.009021759033203, |
|
"learning_rate": 6.713424242424244e-05, |
|
"loss": 1.6486, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 1.7283506082662483, |
|
"grad_norm": 2.40120005607605, |
|
"learning_rate": 6.708373737373738e-05, |
|
"loss": 1.651, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.7309225585761683, |
|
"grad_norm": 2.661926746368408, |
|
"learning_rate": 6.703323232323233e-05, |
|
"loss": 1.663, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 1.7334945088860882, |
|
"grad_norm": 2.7393829822540283, |
|
"learning_rate": 6.698272727272727e-05, |
|
"loss": 1.6435, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 1.7360664591960084, |
|
"grad_norm": 2.4835827350616455, |
|
"learning_rate": 6.693222222222223e-05, |
|
"loss": 1.6592, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 1.7386384095059282, |
|
"grad_norm": 2.1766092777252197, |
|
"learning_rate": 6.68818181818182e-05, |
|
"loss": 1.6624, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 1.7412103598158484, |
|
"grad_norm": 2.023101329803467, |
|
"learning_rate": 6.683131313131314e-05, |
|
"loss": 1.6464, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 1.7437823101257683, |
|
"grad_norm": 2.04542875289917, |
|
"learning_rate": 6.678080808080809e-05, |
|
"loss": 1.6598, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 1.7463542604356883, |
|
"grad_norm": 2.204482078552246, |
|
"learning_rate": 6.673030303030303e-05, |
|
"loss": 1.6412, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 1.7489262107456085, |
|
"grad_norm": 2.304865598678589, |
|
"learning_rate": 6.667979797979799e-05, |
|
"loss": 1.6596, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.7514981610555282, |
|
"grad_norm": 2.291093349456787, |
|
"learning_rate": 6.662929292929293e-05, |
|
"loss": 1.6641, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 1.7540701113654484, |
|
"grad_norm": 2.821134328842163, |
|
"learning_rate": 6.657878787878789e-05, |
|
"loss": 1.6525, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 1.7566420616753684, |
|
"grad_norm": 2.6450328826904297, |
|
"learning_rate": 6.652838383838384e-05, |
|
"loss": 1.6559, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 1.7592140119852884, |
|
"grad_norm": 2.166497230529785, |
|
"learning_rate": 6.647787878787879e-05, |
|
"loss": 1.6591, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 1.7617859622952086, |
|
"grad_norm": 2.3948822021484375, |
|
"learning_rate": 6.642737373737374e-05, |
|
"loss": 1.6536, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 1.7643579126051283, |
|
"grad_norm": 2.443253517150879, |
|
"learning_rate": 6.637686868686868e-05, |
|
"loss": 1.6489, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 1.7669298629150485, |
|
"grad_norm": 2.701960802078247, |
|
"learning_rate": 6.632636363636364e-05, |
|
"loss": 1.6575, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 1.7695018132249685, |
|
"grad_norm": 2.5581912994384766, |
|
"learning_rate": 6.627585858585859e-05, |
|
"loss": 1.6558, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.7720737635348884, |
|
"grad_norm": 2.5111706256866455, |
|
"learning_rate": 6.622535353535354e-05, |
|
"loss": 1.6555, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 1.7746457138448086, |
|
"grad_norm": 2.4795475006103516, |
|
"learning_rate": 6.617484848484848e-05, |
|
"loss": 1.6484, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 1.7772176641547284, |
|
"grad_norm": 2.4566597938537598, |
|
"learning_rate": 6.612444444444444e-05, |
|
"loss": 1.6532, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 1.7797896144646486, |
|
"grad_norm": 2.694000005722046, |
|
"learning_rate": 6.60739393939394e-05, |
|
"loss": 1.6398, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 1.7823615647745685, |
|
"grad_norm": 2.3903775215148926, |
|
"learning_rate": 6.602343434343435e-05, |
|
"loss": 1.66, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 1.7849335150844885, |
|
"grad_norm": 2.5123212337493896, |
|
"learning_rate": 6.59729292929293e-05, |
|
"loss": 1.6551, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 1.7875054653944087, |
|
"grad_norm": 2.346447467803955, |
|
"learning_rate": 6.592242424242424e-05, |
|
"loss": 1.664, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 1.7900774157043284, |
|
"grad_norm": 2.535243034362793, |
|
"learning_rate": 6.58719191919192e-05, |
|
"loss": 1.6504, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 1.7926493660142486, |
|
"grad_norm": 2.1878671646118164, |
|
"learning_rate": 6.582141414141413e-05, |
|
"loss": 1.6465, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 1.7952213163241686, |
|
"grad_norm": 1.969903826713562, |
|
"learning_rate": 6.57709090909091e-05, |
|
"loss": 1.6593, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 1.7977932666340886, |
|
"grad_norm": 2.7635295391082764, |
|
"learning_rate": 6.572050505050505e-05, |
|
"loss": 1.6487, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 1.8003652169440088, |
|
"grad_norm": 2.6183090209960938, |
|
"learning_rate": 6.567010101010101e-05, |
|
"loss": 1.6512, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.8029371672539285, |
|
"grad_norm": 2.6972358226776123, |
|
"learning_rate": 6.561959595959596e-05, |
|
"loss": 1.6441, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 1.8055091175638487, |
|
"grad_norm": 2.986240863800049, |
|
"learning_rate": 6.556909090909092e-05, |
|
"loss": 1.6467, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 1.8080810678737687, |
|
"grad_norm": 2.5499420166015625, |
|
"learning_rate": 6.551858585858585e-05, |
|
"loss": 1.6356, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 1.8106530181836886, |
|
"grad_norm": 2.5218753814697266, |
|
"learning_rate": 6.546808080808081e-05, |
|
"loss": 1.6519, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.8132249684936088, |
|
"grad_norm": 2.1634602546691895, |
|
"learning_rate": 6.541767676767677e-05, |
|
"loss": 1.6409, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 1.8157969188035286, |
|
"grad_norm": 1.9278182983398438, |
|
"learning_rate": 6.536717171717172e-05, |
|
"loss": 1.6321, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 1.8183688691134487, |
|
"grad_norm": 2.819406509399414, |
|
"learning_rate": 6.531666666666666e-05, |
|
"loss": 1.6444, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 1.8209408194233687, |
|
"grad_norm": 2.276034116744995, |
|
"learning_rate": 6.526616161616161e-05, |
|
"loss": 1.6417, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 1.8235127697332887, |
|
"grad_norm": 1.9764829874038696, |
|
"learning_rate": 6.521565656565657e-05, |
|
"loss": 1.6306, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 1.8260847200432089, |
|
"grad_norm": 1.9372199773788452, |
|
"learning_rate": 6.516515151515152e-05, |
|
"loss": 1.6447, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 1.8286566703531286, |
|
"grad_norm": 2.0721209049224854, |
|
"learning_rate": 6.511464646464646e-05, |
|
"loss": 1.6436, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 1.8312286206630488, |
|
"grad_norm": 2.5440256595611572, |
|
"learning_rate": 6.506414141414142e-05, |
|
"loss": 1.652, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 1.8338005709729688, |
|
"grad_norm": 2.4953465461730957, |
|
"learning_rate": 6.501373737373738e-05, |
|
"loss": 1.6449, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 1.8363725212828887, |
|
"grad_norm": 2.357142686843872, |
|
"learning_rate": 6.496323232323233e-05, |
|
"loss": 1.6506, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.838944471592809, |
|
"grad_norm": 2.122255325317383, |
|
"learning_rate": 6.491272727272728e-05, |
|
"loss": 1.6472, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 1.8415164219027287, |
|
"grad_norm": 2.392409324645996, |
|
"learning_rate": 6.486222222222222e-05, |
|
"loss": 1.6321, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.8440883722126489, |
|
"grad_norm": 2.4341251850128174, |
|
"learning_rate": 6.481181818181818e-05, |
|
"loss": 1.6351, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 1.8466603225225688, |
|
"grad_norm": 2.6125593185424805, |
|
"learning_rate": 6.476131313131314e-05, |
|
"loss": 1.632, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 1.8492322728324888, |
|
"grad_norm": 2.6240487098693848, |
|
"learning_rate": 6.471090909090909e-05, |
|
"loss": 1.6358, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 1.851804223142409, |
|
"grad_norm": 2.084984540939331, |
|
"learning_rate": 6.466050505050505e-05, |
|
"loss": 1.6346, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.8543761734523287, |
|
"grad_norm": 2.0900211334228516, |
|
"learning_rate": 6.461e-05, |
|
"loss": 1.642, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 1.856948123762249, |
|
"grad_norm": 2.4863033294677734, |
|
"learning_rate": 6.455949494949495e-05, |
|
"loss": 1.6405, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 1.859520074072169, |
|
"grad_norm": 2.3600735664367676, |
|
"learning_rate": 6.45089898989899e-05, |
|
"loss": 1.6417, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 1.8620920243820889, |
|
"grad_norm": 2.359057664871216, |
|
"learning_rate": 6.445848484848486e-05, |
|
"loss": 1.6421, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 1.864663974692009, |
|
"grad_norm": 2.2243077754974365, |
|
"learning_rate": 6.44079797979798e-05, |
|
"loss": 1.6285, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 1.8672359250019288, |
|
"grad_norm": 2.724112033843994, |
|
"learning_rate": 6.435747474747475e-05, |
|
"loss": 1.6185, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 1.869807875311849, |
|
"grad_norm": 2.4706525802612305, |
|
"learning_rate": 6.43069696969697e-05, |
|
"loss": 1.6416, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 1.872379825621769, |
|
"grad_norm": 2.599776268005371, |
|
"learning_rate": 6.425646464646466e-05, |
|
"loss": 1.6355, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 1.874951775931689, |
|
"grad_norm": 2.1543681621551514, |
|
"learning_rate": 6.420595959595959e-05, |
|
"loss": 1.6432, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 1.8775237262416091, |
|
"grad_norm": 2.042337417602539, |
|
"learning_rate": 6.415545454545455e-05, |
|
"loss": 1.6415, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 1.8800956765515289, |
|
"grad_norm": 2.3360307216644287, |
|
"learning_rate": 6.41049494949495e-05, |
|
"loss": 1.6353, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 1.882667626861449, |
|
"grad_norm": 2.5931334495544434, |
|
"learning_rate": 6.405454545454546e-05, |
|
"loss": 1.6297, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 1.885239577171369, |
|
"grad_norm": 2.690889835357666, |
|
"learning_rate": 6.400404040404042e-05, |
|
"loss": 1.6346, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 1.887811527481289, |
|
"grad_norm": 2.677400827407837, |
|
"learning_rate": 6.395353535353535e-05, |
|
"loss": 1.6375, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 1.8903834777912092, |
|
"grad_norm": 2.1778125762939453, |
|
"learning_rate": 6.390303030303031e-05, |
|
"loss": 1.6205, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 1.892955428101129, |
|
"grad_norm": 2.61460280418396, |
|
"learning_rate": 6.385262626262627e-05, |
|
"loss": 1.635, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.8955273784110491, |
|
"grad_norm": 2.425158739089966, |
|
"learning_rate": 6.380212121212122e-05, |
|
"loss": 1.628, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 1.898099328720969, |
|
"grad_norm": 2.5733518600463867, |
|
"learning_rate": 6.375161616161616e-05, |
|
"loss": 1.6284, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 1.900671279030889, |
|
"grad_norm": 2.4769554138183594, |
|
"learning_rate": 6.370111111111111e-05, |
|
"loss": 1.6334, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 1.9032432293408093, |
|
"grad_norm": 2.93058180809021, |
|
"learning_rate": 6.365060606060607e-05, |
|
"loss": 1.6353, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.905815179650729, |
|
"grad_norm": 2.4658243656158447, |
|
"learning_rate": 6.360010101010101e-05, |
|
"loss": 1.6291, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 1.9083871299606492, |
|
"grad_norm": 2.2507095336914062, |
|
"learning_rate": 6.354959595959596e-05, |
|
"loss": 1.6405, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 1.9109590802705692, |
|
"grad_norm": 2.3738880157470703, |
|
"learning_rate": 6.349909090909091e-05, |
|
"loss": 1.6309, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 1.9135310305804891, |
|
"grad_norm": 2.008300304412842, |
|
"learning_rate": 6.344858585858587e-05, |
|
"loss": 1.6273, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 1.9161029808904093, |
|
"grad_norm": 2.2649285793304443, |
|
"learning_rate": 6.339818181818183e-05, |
|
"loss": 1.6134, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 1.918674931200329, |
|
"grad_norm": 2.506477117538452, |
|
"learning_rate": 6.334767676767677e-05, |
|
"loss": 1.6283, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 1.9212468815102492, |
|
"grad_norm": 2.661729335784912, |
|
"learning_rate": 6.329717171717172e-05, |
|
"loss": 1.6246, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 1.9238188318201692, |
|
"grad_norm": 2.6854159832000732, |
|
"learning_rate": 6.324666666666667e-05, |
|
"loss": 1.6274, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 1.9263907821300892, |
|
"grad_norm": 2.402884006500244, |
|
"learning_rate": 6.319616161616163e-05, |
|
"loss": 1.6208, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 1.9289627324400094, |
|
"grad_norm": 2.1268699169158936, |
|
"learning_rate": 6.314565656565656e-05, |
|
"loss": 1.6306, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.9315346827499291, |
|
"grad_norm": 2.4067907333374023, |
|
"learning_rate": 6.309525252525252e-05, |
|
"loss": 1.6071, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 1.9341066330598493, |
|
"grad_norm": 2.2865099906921387, |
|
"learning_rate": 6.304474747474748e-05, |
|
"loss": 1.623, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.9366785833697693, |
|
"grad_norm": 2.0596396923065186, |
|
"learning_rate": 6.299424242424242e-05, |
|
"loss": 1.6288, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 1.9392505336796892, |
|
"grad_norm": 3.2876358032226562, |
|
"learning_rate": 6.294373737373738e-05, |
|
"loss": 1.635, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 1.9418224839896094, |
|
"grad_norm": 2.7481908798217773, |
|
"learning_rate": 6.289333333333334e-05, |
|
"loss": 1.616, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 1.9443944342995292, |
|
"grad_norm": 2.604656457901001, |
|
"learning_rate": 6.284282828282828e-05, |
|
"loss": 1.6255, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 1.9469663846094494, |
|
"grad_norm": 2.7096235752105713, |
|
"learning_rate": 6.279232323232324e-05, |
|
"loss": 1.627, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 1.9495383349193693, |
|
"grad_norm": 2.6425135135650635, |
|
"learning_rate": 6.274181818181818e-05, |
|
"loss": 1.6308, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 1.9521102852292893, |
|
"grad_norm": 2.2761101722717285, |
|
"learning_rate": 6.269141414141414e-05, |
|
"loss": 1.6328, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 1.9546822355392095, |
|
"grad_norm": 2.5872933864593506, |
|
"learning_rate": 6.264090909090909e-05, |
|
"loss": 1.6326, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.9572541858491292, |
|
"grad_norm": 2.401745319366455, |
|
"learning_rate": 6.259040404040403e-05, |
|
"loss": 1.6179, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 1.9598261361590494, |
|
"grad_norm": 2.335178852081299, |
|
"learning_rate": 6.2539898989899e-05, |
|
"loss": 1.6225, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 1.9623980864689694, |
|
"grad_norm": 2.1984500885009766, |
|
"learning_rate": 6.248939393939394e-05, |
|
"loss": 1.6226, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 1.9649700367788894, |
|
"grad_norm": 2.53519606590271, |
|
"learning_rate": 6.243888888888889e-05, |
|
"loss": 1.624, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 1.9675419870888096, |
|
"grad_norm": 2.1146388053894043, |
|
"learning_rate": 6.238838383838385e-05, |
|
"loss": 1.6336, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 1.9701139373987293, |
|
"grad_norm": 2.4738714694976807, |
|
"learning_rate": 6.23378787878788e-05, |
|
"loss": 1.637, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 1.9726858877086495, |
|
"grad_norm": 2.67535138130188, |
|
"learning_rate": 6.228737373737374e-05, |
|
"loss": 1.6248, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 1.9752578380185695, |
|
"grad_norm": 2.1487460136413574, |
|
"learning_rate": 6.223686868686869e-05, |
|
"loss": 1.6279, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.9778297883284894, |
|
"grad_norm": 2.0736780166625977, |
|
"learning_rate": 6.218636363636365e-05, |
|
"loss": 1.6211, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 1.9804017386384096, |
|
"grad_norm": 2.4359467029571533, |
|
"learning_rate": 6.21359595959596e-05, |
|
"loss": 1.6341, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 1.9829736889483294, |
|
"grad_norm": 2.57645845413208, |
|
"learning_rate": 6.208545454545455e-05, |
|
"loss": 1.6326, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 1.9855456392582496, |
|
"grad_norm": 2.375304698944092, |
|
"learning_rate": 6.20349494949495e-05, |
|
"loss": 1.6338, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 1.9881175895681695, |
|
"grad_norm": 2.1585114002227783, |
|
"learning_rate": 6.198444444444444e-05, |
|
"loss": 1.6152, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 1.9906895398780895, |
|
"grad_norm": 2.393204689025879, |
|
"learning_rate": 6.19339393939394e-05, |
|
"loss": 1.6081, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 1.9932614901880097, |
|
"grad_norm": 2.543041706085205, |
|
"learning_rate": 6.188343434343434e-05, |
|
"loss": 1.6122, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 1.9958334404979294, |
|
"grad_norm": 2.2563304901123047, |
|
"learning_rate": 6.18329292929293e-05, |
|
"loss": 1.6172, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 1.9984053908078496, |
|
"grad_norm": 2.4522125720977783, |
|
"learning_rate": 6.178242424242424e-05, |
|
"loss": 1.6129, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 2.00097734111777, |
|
"grad_norm": 2.579383611679077, |
|
"learning_rate": 6.17320202020202e-05, |
|
"loss": 1.6145, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 2.0035492914276896, |
|
"grad_norm": 2.0245561599731445, |
|
"learning_rate": 6.168161616161616e-05, |
|
"loss": 1.6293, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 2.0061212417376098, |
|
"grad_norm": 2.2552874088287354, |
|
"learning_rate": 6.163111111111112e-05, |
|
"loss": 1.606, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 2.0086931920475295, |
|
"grad_norm": 2.6959872245788574, |
|
"learning_rate": 6.158060606060606e-05, |
|
"loss": 1.6271, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 2.0112651423574497, |
|
"grad_norm": 2.65429949760437, |
|
"learning_rate": 6.153010101010102e-05, |
|
"loss": 1.6144, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 2.01383709266737, |
|
"grad_norm": 2.2554690837860107, |
|
"learning_rate": 6.147959595959596e-05, |
|
"loss": 1.614, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 2.0164090429772896, |
|
"grad_norm": 2.3167171478271484, |
|
"learning_rate": 6.142909090909091e-05, |
|
"loss": 1.6128, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 2.01898099328721, |
|
"grad_norm": 2.5956623554229736, |
|
"learning_rate": 6.137858585858585e-05, |
|
"loss": 1.6034, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 2.0215529435971296, |
|
"grad_norm": 2.968029260635376, |
|
"learning_rate": 6.132808080808081e-05, |
|
"loss": 1.6141, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 2.0241248939070497, |
|
"grad_norm": 2.7544617652893066, |
|
"learning_rate": 6.127767676767677e-05, |
|
"loss": 1.6214, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 2.02669684421697, |
|
"grad_norm": 2.2742416858673096, |
|
"learning_rate": 6.122717171717172e-05, |
|
"loss": 1.6148, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 2.0292687945268897, |
|
"grad_norm": 2.220961809158325, |
|
"learning_rate": 6.117666666666667e-05, |
|
"loss": 1.619, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 2.03184074483681, |
|
"grad_norm": 2.195733070373535, |
|
"learning_rate": 6.112616161616161e-05, |
|
"loss": 1.616, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 2.0344126951467296, |
|
"grad_norm": 2.3462278842926025, |
|
"learning_rate": 6.107575757575757e-05, |
|
"loss": 1.6129, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 2.03698464545665, |
|
"grad_norm": 2.70003604888916, |
|
"learning_rate": 6.102525252525253e-05, |
|
"loss": 1.6043, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 2.03955659576657, |
|
"grad_norm": 2.403668165206909, |
|
"learning_rate": 6.097474747474747e-05, |
|
"loss": 1.6184, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 2.0421285460764897, |
|
"grad_norm": 2.6988089084625244, |
|
"learning_rate": 6.0924242424242425e-05, |
|
"loss": 1.5978, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 2.04470049638641, |
|
"grad_norm": 2.7455625534057617, |
|
"learning_rate": 6.087383838383839e-05, |
|
"loss": 1.6167, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 2.0472724466963297, |
|
"grad_norm": 2.071835994720459, |
|
"learning_rate": 6.082343434343435e-05, |
|
"loss": 1.6044, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 2.04984439700625, |
|
"grad_norm": 2.2983603477478027, |
|
"learning_rate": 6.077303030303031e-05, |
|
"loss": 1.6122, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 2.05241634731617, |
|
"grad_norm": 2.077721118927002, |
|
"learning_rate": 6.072252525252525e-05, |
|
"loss": 1.6174, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 2.05498829762609, |
|
"grad_norm": 2.942838430404663, |
|
"learning_rate": 6.0672121212121216e-05, |
|
"loss": 1.6065, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 2.05756024793601, |
|
"grad_norm": 2.2567286491394043, |
|
"learning_rate": 6.062161616161617e-05, |
|
"loss": 1.5962, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 2.0601321982459297, |
|
"grad_norm": 2.995159149169922, |
|
"learning_rate": 6.057111111111111e-05, |
|
"loss": 1.5997, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 2.06270414855585, |
|
"grad_norm": 2.48285174369812, |
|
"learning_rate": 6.052060606060607e-05, |
|
"loss": 1.6092, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 2.06527609886577, |
|
"grad_norm": 2.27602481842041, |
|
"learning_rate": 6.047010101010101e-05, |
|
"loss": 1.6085, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 2.06784804917569, |
|
"grad_norm": 2.100888252258301, |
|
"learning_rate": 6.041959595959596e-05, |
|
"loss": 1.6074, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 2.07041999948561, |
|
"grad_norm": 2.656245708465576, |
|
"learning_rate": 6.036909090909091e-05, |
|
"loss": 1.6188, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 2.07299194979553, |
|
"grad_norm": 2.497401237487793, |
|
"learning_rate": 6.031858585858586e-05, |
|
"loss": 1.5999, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 2.07556390010545, |
|
"grad_norm": 2.740108013153076, |
|
"learning_rate": 6.026808080808081e-05, |
|
"loss": 1.6138, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 2.07813585041537, |
|
"grad_norm": 2.2161812782287598, |
|
"learning_rate": 6.021757575757576e-05, |
|
"loss": 1.5904, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 2.08070780072529, |
|
"grad_norm": 2.5596768856048584, |
|
"learning_rate": 6.016707070707071e-05, |
|
"loss": 1.5991, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 2.08327975103521, |
|
"grad_norm": 2.474024772644043, |
|
"learning_rate": 6.011656565656566e-05, |
|
"loss": 1.6169, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 2.08585170134513, |
|
"grad_norm": 2.562389373779297, |
|
"learning_rate": 6.0066060606060606e-05, |
|
"loss": 1.5961, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 2.08842365165505, |
|
"grad_norm": 2.165395498275757, |
|
"learning_rate": 6.001555555555556e-05, |
|
"loss": 1.6032, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 2.09099560196497, |
|
"grad_norm": 2.6308302879333496, |
|
"learning_rate": 5.996505050505051e-05, |
|
"loss": 1.6094, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 2.09356755227489, |
|
"grad_norm": 2.065725564956665, |
|
"learning_rate": 5.9914646464646465e-05, |
|
"loss": 1.6033, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 2.09613950258481, |
|
"grad_norm": 3.004451274871826, |
|
"learning_rate": 5.986414141414142e-05, |
|
"loss": 1.6011, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 2.09871145289473, |
|
"grad_norm": 2.6577351093292236, |
|
"learning_rate": 5.981363636363637e-05, |
|
"loss": 1.5985, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 2.10128340320465, |
|
"grad_norm": 2.4974942207336426, |
|
"learning_rate": 5.976313131313132e-05, |
|
"loss": 1.6029, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 2.10385535351457, |
|
"grad_norm": 2.7885189056396484, |
|
"learning_rate": 5.971262626262627e-05, |
|
"loss": 1.6035, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 2.10642730382449, |
|
"grad_norm": 2.323251485824585, |
|
"learning_rate": 5.966212121212121e-05, |
|
"loss": 1.6054, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 2.1089992541344103, |
|
"grad_norm": 2.666215658187866, |
|
"learning_rate": 5.961171717171718e-05, |
|
"loss": 1.5987, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 2.11157120444433, |
|
"grad_norm": 2.597623586654663, |
|
"learning_rate": 5.956121212121213e-05, |
|
"loss": 1.606, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 2.11414315475425, |
|
"grad_norm": 1.9947013854980469, |
|
"learning_rate": 5.951070707070707e-05, |
|
"loss": 1.5954, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 2.11671510506417, |
|
"grad_norm": 2.544792652130127, |
|
"learning_rate": 5.946020202020203e-05, |
|
"loss": 1.5898, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 2.11928705537409, |
|
"grad_norm": 2.5514931678771973, |
|
"learning_rate": 5.940979797979799e-05, |
|
"loss": 1.5974, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 2.1218590056840103, |
|
"grad_norm": 2.448437213897705, |
|
"learning_rate": 5.935929292929293e-05, |
|
"loss": 1.6039, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 2.12443095599393, |
|
"grad_norm": 2.7591707706451416, |
|
"learning_rate": 5.930878787878789e-05, |
|
"loss": 1.604, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 2.1270029063038502, |
|
"grad_norm": 2.3299643993377686, |
|
"learning_rate": 5.925828282828283e-05, |
|
"loss": 1.6002, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 2.12957485661377, |
|
"grad_norm": 2.2050669193267822, |
|
"learning_rate": 5.920777777777778e-05, |
|
"loss": 1.6087, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 2.13214680692369, |
|
"grad_norm": 2.514944314956665, |
|
"learning_rate": 5.915727272727273e-05, |
|
"loss": 1.5965, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 2.1347187572336104, |
|
"grad_norm": 2.3953447341918945, |
|
"learning_rate": 5.910676767676768e-05, |
|
"loss": 1.598, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 2.13729070754353, |
|
"grad_norm": 2.2718632221221924, |
|
"learning_rate": 5.905626262626263e-05, |
|
"loss": 1.5952, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 2.1398626578534503, |
|
"grad_norm": 2.559480905532837, |
|
"learning_rate": 5.900585858585859e-05, |
|
"loss": 1.5933, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 2.14243460816337, |
|
"grad_norm": 2.7121787071228027, |
|
"learning_rate": 5.895535353535354e-05, |
|
"loss": 1.6007, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 2.1450065584732902, |
|
"grad_norm": 3.180011510848999, |
|
"learning_rate": 5.8904848484848486e-05, |
|
"loss": 1.5951, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 2.1475785087832104, |
|
"grad_norm": 3.01538348197937, |
|
"learning_rate": 5.885434343434344e-05, |
|
"loss": 1.6045, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 2.15015045909313, |
|
"grad_norm": 2.491154670715332, |
|
"learning_rate": 5.8803838383838386e-05, |
|
"loss": 1.6071, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 2.1527224094030504, |
|
"grad_norm": 2.4242184162139893, |
|
"learning_rate": 5.875333333333334e-05, |
|
"loss": 1.5957, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 2.15529435971297, |
|
"grad_norm": 2.3193559646606445, |
|
"learning_rate": 5.87029292929293e-05, |
|
"loss": 1.6033, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 2.1578663100228903, |
|
"grad_norm": 2.1788597106933594, |
|
"learning_rate": 5.8652424242424245e-05, |
|
"loss": 1.5927, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 2.1604382603328105, |
|
"grad_norm": 2.646376371383667, |
|
"learning_rate": 5.86019191919192e-05, |
|
"loss": 1.5895, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 2.1630102106427302, |
|
"grad_norm": 2.4380106925964355, |
|
"learning_rate": 5.855141414141414e-05, |
|
"loss": 1.5864, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 2.1655821609526504, |
|
"grad_norm": 2.479421377182007, |
|
"learning_rate": 5.85009090909091e-05, |
|
"loss": 1.5964, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 2.16815411126257, |
|
"grad_norm": 2.3349339962005615, |
|
"learning_rate": 5.845040404040404e-05, |
|
"loss": 1.597, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 2.1707260615724904, |
|
"grad_norm": 2.4106128215789795, |
|
"learning_rate": 5.8399999999999997e-05, |
|
"loss": 1.5902, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 2.1732980118824106, |
|
"grad_norm": 2.562054395675659, |
|
"learning_rate": 5.834959595959596e-05, |
|
"loss": 1.5963, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 2.1758699621923303, |
|
"grad_norm": 2.206015110015869, |
|
"learning_rate": 5.8299090909090916e-05, |
|
"loss": 1.5993, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 2.1784419125022505, |
|
"grad_norm": 2.554619550704956, |
|
"learning_rate": 5.8248585858585856e-05, |
|
"loss": 1.6091, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 2.1810138628121702, |
|
"grad_norm": 2.2453134059906006, |
|
"learning_rate": 5.8198080808080815e-05, |
|
"loss": 1.5852, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 2.1835858131220904, |
|
"grad_norm": 2.3707222938537598, |
|
"learning_rate": 5.8147575757575755e-05, |
|
"loss": 1.587, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 2.1861577634320106, |
|
"grad_norm": 2.2257208824157715, |
|
"learning_rate": 5.809707070707071e-05, |
|
"loss": 1.5821, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 2.1887297137419304, |
|
"grad_norm": 2.582345724105835, |
|
"learning_rate": 5.8046666666666674e-05, |
|
"loss": 1.5937, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 2.1913016640518506, |
|
"grad_norm": 2.2276124954223633, |
|
"learning_rate": 5.7996161616161614e-05, |
|
"loss": 1.5982, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 2.1938736143617703, |
|
"grad_norm": 2.5953102111816406, |
|
"learning_rate": 5.794565656565657e-05, |
|
"loss": 1.6, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 2.1964455646716905, |
|
"grad_norm": 2.059342861175537, |
|
"learning_rate": 5.7895151515151514e-05, |
|
"loss": 1.6002, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 2.1990175149816107, |
|
"grad_norm": 2.5329113006591797, |
|
"learning_rate": 5.784464646464647e-05, |
|
"loss": 1.5795, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 2.2015894652915304, |
|
"grad_norm": 2.672567844390869, |
|
"learning_rate": 5.779414141414141e-05, |
|
"loss": 1.5899, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 2.2041614156014506, |
|
"grad_norm": 2.0910274982452393, |
|
"learning_rate": 5.7743636363636366e-05, |
|
"loss": 1.5848, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 2.2067333659113704, |
|
"grad_norm": 2.369044542312622, |
|
"learning_rate": 5.769313131313132e-05, |
|
"loss": 1.5936, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 2.2093053162212906, |
|
"grad_norm": 2.7465758323669434, |
|
"learning_rate": 5.7642626262626266e-05, |
|
"loss": 1.5933, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 2.2118772665312108, |
|
"grad_norm": 2.3471922874450684, |
|
"learning_rate": 5.7592222222222225e-05, |
|
"loss": 1.5846, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 2.2144492168411305, |
|
"grad_norm": 2.5954208374023438, |
|
"learning_rate": 5.754171717171718e-05, |
|
"loss": 1.5892, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 2.2170211671510507, |
|
"grad_norm": 2.122445583343506, |
|
"learning_rate": 5.7491212121212125e-05, |
|
"loss": 1.5951, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 2.2195931174609704, |
|
"grad_norm": 2.378053665161133, |
|
"learning_rate": 5.744070707070708e-05, |
|
"loss": 1.595, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 2.2221650677708906, |
|
"grad_norm": 3.016186475753784, |
|
"learning_rate": 5.7390202020202024e-05, |
|
"loss": 1.5805, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 2.224737018080811, |
|
"grad_norm": 2.2016313076019287, |
|
"learning_rate": 5.7339797979797984e-05, |
|
"loss": 1.5976, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 2.2273089683907306, |
|
"grad_norm": 2.296274423599243, |
|
"learning_rate": 5.728929292929294e-05, |
|
"loss": 1.5844, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 2.2298809187006507, |
|
"grad_norm": 2.5509867668151855, |
|
"learning_rate": 5.723878787878788e-05, |
|
"loss": 1.5907, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 2.2324528690105705, |
|
"grad_norm": 2.5408694744110107, |
|
"learning_rate": 5.7188282828282836e-05, |
|
"loss": 1.6015, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 2.2350248193204907, |
|
"grad_norm": 2.5384156703948975, |
|
"learning_rate": 5.7137878787878796e-05, |
|
"loss": 1.602, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 2.237596769630411, |
|
"grad_norm": 2.3616080284118652, |
|
"learning_rate": 5.708737373737374e-05, |
|
"loss": 1.5998, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 2.2401687199403306, |
|
"grad_norm": 2.7889325618743896, |
|
"learning_rate": 5.7036868686868695e-05, |
|
"loss": 1.5842, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 2.242740670250251, |
|
"grad_norm": 2.3167500495910645, |
|
"learning_rate": 5.6986363636363635e-05, |
|
"loss": 1.5897, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 2.2453126205601706, |
|
"grad_norm": 2.556781053543091, |
|
"learning_rate": 5.6935858585858595e-05, |
|
"loss": 1.5807, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 2.2478845708700907, |
|
"grad_norm": 2.290909767150879, |
|
"learning_rate": 5.6885353535353534e-05, |
|
"loss": 1.5765, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 2.250456521180011, |
|
"grad_norm": 2.239105224609375, |
|
"learning_rate": 5.6834949494949494e-05, |
|
"loss": 1.5916, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 2.2530284714899307, |
|
"grad_norm": 2.7574894428253174, |
|
"learning_rate": 5.6784444444444454e-05, |
|
"loss": 1.5915, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 2.255600421799851, |
|
"grad_norm": 2.2202274799346924, |
|
"learning_rate": 5.673393939393939e-05, |
|
"loss": 1.5921, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 2.2581723721097706, |
|
"grad_norm": 2.6853768825531006, |
|
"learning_rate": 5.6683434343434346e-05, |
|
"loss": 1.5815, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 2.260744322419691, |
|
"grad_norm": 2.2511544227600098, |
|
"learning_rate": 5.663292929292929e-05, |
|
"loss": 1.5858, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 2.263316272729611, |
|
"grad_norm": 2.5201659202575684, |
|
"learning_rate": 5.6582424242424246e-05, |
|
"loss": 1.577, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 2.2658882230395307, |
|
"grad_norm": 2.3538320064544678, |
|
"learning_rate": 5.6532020202020206e-05, |
|
"loss": 1.5781, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 2.268460173349451, |
|
"grad_norm": 2.352900981903076, |
|
"learning_rate": 5.648151515151515e-05, |
|
"loss": 1.5677, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 2.2710321236593707, |
|
"grad_norm": 2.8098092079162598, |
|
"learning_rate": 5.6431010101010105e-05, |
|
"loss": 1.5754, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 2.273604073969291, |
|
"grad_norm": 2.5628156661987305, |
|
"learning_rate": 5.638050505050505e-05, |
|
"loss": 1.5882, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 2.276176024279211, |
|
"grad_norm": 2.2846975326538086, |
|
"learning_rate": 5.633010101010101e-05, |
|
"loss": 1.5868, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 2.278747974589131, |
|
"grad_norm": 2.268409013748169, |
|
"learning_rate": 5.6279595959595964e-05, |
|
"loss": 1.5823, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 2.281319924899051, |
|
"grad_norm": 2.092773914337158, |
|
"learning_rate": 5.622909090909091e-05, |
|
"loss": 1.5884, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 2.2838918752089707, |
|
"grad_norm": 2.2289109230041504, |
|
"learning_rate": 5.6178585858585863e-05, |
|
"loss": 1.5933, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 2.286463825518891, |
|
"grad_norm": 2.1926701068878174, |
|
"learning_rate": 5.612808080808081e-05, |
|
"loss": 1.5816, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 2.289035775828811, |
|
"grad_norm": 2.5182721614837646, |
|
"learning_rate": 5.607757575757576e-05, |
|
"loss": 1.5973, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 2.291607726138731, |
|
"grad_norm": 2.6437392234802246, |
|
"learning_rate": 5.602717171717172e-05, |
|
"loss": 1.5841, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 2.294179676448651, |
|
"grad_norm": 2.7058298587799072, |
|
"learning_rate": 5.597666666666667e-05, |
|
"loss": 1.5831, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 2.296751626758571, |
|
"grad_norm": 2.0953357219696045, |
|
"learning_rate": 5.592616161616162e-05, |
|
"loss": 1.5918, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 2.299323577068491, |
|
"grad_norm": 2.297541618347168, |
|
"learning_rate": 5.587565656565656e-05, |
|
"loss": 1.5666, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 2.301895527378411, |
|
"grad_norm": 2.4610650539398193, |
|
"learning_rate": 5.582525252525253e-05, |
|
"loss": 1.5804, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 2.304467477688331, |
|
"grad_norm": 2.629695415496826, |
|
"learning_rate": 5.577474747474748e-05, |
|
"loss": 1.5843, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 2.307039427998251, |
|
"grad_norm": 2.474860906600952, |
|
"learning_rate": 5.572424242424242e-05, |
|
"loss": 1.5928, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 2.309611378308171, |
|
"grad_norm": 2.8906733989715576, |
|
"learning_rate": 5.567373737373738e-05, |
|
"loss": 1.5825, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 2.312183328618091, |
|
"grad_norm": 2.610053062438965, |
|
"learning_rate": 5.562323232323232e-05, |
|
"loss": 1.5864, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 2.3147552789280113, |
|
"grad_norm": 2.2027618885040283, |
|
"learning_rate": 5.557282828282828e-05, |
|
"loss": 1.5657, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 2.317327229237931, |
|
"grad_norm": 2.362893581390381, |
|
"learning_rate": 5.552232323232324e-05, |
|
"loss": 1.5803, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 2.319899179547851, |
|
"grad_norm": 3.065056800842285, |
|
"learning_rate": 5.547181818181818e-05, |
|
"loss": 1.5745, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 2.322471129857771, |
|
"grad_norm": 2.644787311553955, |
|
"learning_rate": 5.542131313131313e-05, |
|
"loss": 1.5805, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 2.325043080167691, |
|
"grad_norm": 2.324190855026245, |
|
"learning_rate": 5.537080808080808e-05, |
|
"loss": 1.5782, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 2.3276150304776113, |
|
"grad_norm": 2.8596031665802, |
|
"learning_rate": 5.532030303030303e-05, |
|
"loss": 1.5731, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 2.330186980787531, |
|
"grad_norm": 2.6860458850860596, |
|
"learning_rate": 5.526979797979798e-05, |
|
"loss": 1.5761, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 2.3327589310974512, |
|
"grad_norm": 2.1039023399353027, |
|
"learning_rate": 5.521929292929293e-05, |
|
"loss": 1.5773, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 2.335330881407371, |
|
"grad_norm": 2.399176836013794, |
|
"learning_rate": 5.516888888888889e-05, |
|
"loss": 1.5705, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 2.337902831717291, |
|
"grad_norm": 2.207998514175415, |
|
"learning_rate": 5.511838383838384e-05, |
|
"loss": 1.5846, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 2.3404747820272114, |
|
"grad_norm": 2.3117659091949463, |
|
"learning_rate": 5.50679797979798e-05, |
|
"loss": 1.5773, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 2.343046732337131, |
|
"grad_norm": 2.4075472354888916, |
|
"learning_rate": 5.501747474747475e-05, |
|
"loss": 1.5747, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 2.3456186826470513, |
|
"grad_norm": 2.715557813644409, |
|
"learning_rate": 5.4966969696969696e-05, |
|
"loss": 1.5734, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 2.348190632956971, |
|
"grad_norm": 2.486280679702759, |
|
"learning_rate": 5.491646464646465e-05, |
|
"loss": 1.5764, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 2.3507625832668912, |
|
"grad_norm": 2.090132713317871, |
|
"learning_rate": 5.486606060606061e-05, |
|
"loss": 1.5649, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 2.3533345335768114, |
|
"grad_norm": 3.2762579917907715, |
|
"learning_rate": 5.4815555555555555e-05, |
|
"loss": 1.5744, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 2.355906483886731, |
|
"grad_norm": 2.641038179397583, |
|
"learning_rate": 5.4765151515151515e-05, |
|
"loss": 1.5807, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 2.3584784341966514, |
|
"grad_norm": 2.685852527618408, |
|
"learning_rate": 5.471464646464647e-05, |
|
"loss": 1.5747, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 2.361050384506571, |
|
"grad_norm": 3.033771514892578, |
|
"learning_rate": 5.4664141414141414e-05, |
|
"loss": 1.5735, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 2.3636223348164913, |
|
"grad_norm": 2.240175485610962, |
|
"learning_rate": 5.461363636363637e-05, |
|
"loss": 1.5664, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 2.3661942851264115, |
|
"grad_norm": 2.1413381099700928, |
|
"learning_rate": 5.4563131313131314e-05, |
|
"loss": 1.5739, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 2.3687662354363312, |
|
"grad_norm": 2.455625295639038, |
|
"learning_rate": 5.451262626262627e-05, |
|
"loss": 1.5737, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 2.3713381857462514, |
|
"grad_norm": 2.3633012771606445, |
|
"learning_rate": 5.446212121212122e-05, |
|
"loss": 1.5685, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 2.373910136056171, |
|
"grad_norm": 2.4887959957122803, |
|
"learning_rate": 5.4411616161616166e-05, |
|
"loss": 1.5691, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 2.3764820863660914, |
|
"grad_norm": 2.6525588035583496, |
|
"learning_rate": 5.436111111111112e-05, |
|
"loss": 1.5663, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 2.3790540366760116, |
|
"grad_norm": 2.4766228199005127, |
|
"learning_rate": 5.431070707070708e-05, |
|
"loss": 1.5682, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 2.3816259869859313, |
|
"grad_norm": 2.230529308319092, |
|
"learning_rate": 5.4260202020202025e-05, |
|
"loss": 1.5752, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 2.3841979372958515, |
|
"grad_norm": 2.414194345474243, |
|
"learning_rate": 5.420969696969698e-05, |
|
"loss": 1.572, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 2.3867698876057712, |
|
"grad_norm": 2.442136287689209, |
|
"learning_rate": 5.415919191919192e-05, |
|
"loss": 1.5765, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 2.3893418379156914, |
|
"grad_norm": 2.0765578746795654, |
|
"learning_rate": 5.4108787878787884e-05, |
|
"loss": 1.5822, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 2.3919137882256116, |
|
"grad_norm": 2.4134793281555176, |
|
"learning_rate": 5.405828282828284e-05, |
|
"loss": 1.5652, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 2.3944857385355314, |
|
"grad_norm": 2.300403356552124, |
|
"learning_rate": 5.400777777777778e-05, |
|
"loss": 1.5599, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 2.3970576888454516, |
|
"grad_norm": 2.1540491580963135, |
|
"learning_rate": 5.395727272727274e-05, |
|
"loss": 1.5634, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 2.3996296391553713, |
|
"grad_norm": 2.8791420459747314, |
|
"learning_rate": 5.3906767676767676e-05, |
|
"loss": 1.5695, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 2.4022015894652915, |
|
"grad_norm": 2.2609245777130127, |
|
"learning_rate": 5.3856363636363636e-05, |
|
"loss": 1.5726, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 2.4047735397752117, |
|
"grad_norm": 2.4185187816619873, |
|
"learning_rate": 5.3805858585858596e-05, |
|
"loss": 1.5764, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 2.4073454900851314, |
|
"grad_norm": 2.195435047149658, |
|
"learning_rate": 5.3755353535353536e-05, |
|
"loss": 1.5616, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 2.4099174403950516, |
|
"grad_norm": 2.378612756729126, |
|
"learning_rate": 5.370484848484849e-05, |
|
"loss": 1.5636, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 2.4124893907049714, |
|
"grad_norm": 2.3817667961120605, |
|
"learning_rate": 5.3654343434343435e-05, |
|
"loss": 1.5682, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 2.4150613410148916, |
|
"grad_norm": 2.7806594371795654, |
|
"learning_rate": 5.360383838383839e-05, |
|
"loss": 1.5611, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 2.4176332913248118, |
|
"grad_norm": 2.0810320377349854, |
|
"learning_rate": 5.3553333333333334e-05, |
|
"loss": 1.5717, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 2.4202052416347315, |
|
"grad_norm": 2.4072470664978027, |
|
"learning_rate": 5.350282828282829e-05, |
|
"loss": 1.5594, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 2.4227771919446517, |
|
"grad_norm": 2.347970485687256, |
|
"learning_rate": 5.345242424242425e-05, |
|
"loss": 1.5619, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 2.4253491422545714, |
|
"grad_norm": 2.6435277462005615, |
|
"learning_rate": 5.3401919191919193e-05, |
|
"loss": 1.5848, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 2.4279210925644916, |
|
"grad_norm": 2.3187005519866943, |
|
"learning_rate": 5.335141414141415e-05, |
|
"loss": 1.565, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 2.430493042874412, |
|
"grad_norm": 2.5662784576416016, |
|
"learning_rate": 5.3301010101010106e-05, |
|
"loss": 1.5764, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 2.4330649931843316, |
|
"grad_norm": 2.5049164295196533, |
|
"learning_rate": 5.325050505050505e-05, |
|
"loss": 1.5577, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 2.4356369434942517, |
|
"grad_norm": 2.5086004734039307, |
|
"learning_rate": 5.3200000000000006e-05, |
|
"loss": 1.5622, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 2.4382088938041715, |
|
"grad_norm": 2.5472593307495117, |
|
"learning_rate": 5.3149595959595965e-05, |
|
"loss": 1.5525, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 2.4407808441140917, |
|
"grad_norm": 2.441056966781616, |
|
"learning_rate": 5.309909090909091e-05, |
|
"loss": 1.574, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 2.443352794424012, |
|
"grad_norm": 2.6029136180877686, |
|
"learning_rate": 5.3048585858585865e-05, |
|
"loss": 1.5704, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 2.4459247447339316, |
|
"grad_norm": 2.321699857711792, |
|
"learning_rate": 5.299808080808081e-05, |
|
"loss": 1.5549, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 2.448496695043852, |
|
"grad_norm": 2.694145441055298, |
|
"learning_rate": 5.2947575757575764e-05, |
|
"loss": 1.5603, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 2.4510686453537716, |
|
"grad_norm": 2.952949047088623, |
|
"learning_rate": 5.2897070707070704e-05, |
|
"loss": 1.5659, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 2.4536405956636917, |
|
"grad_norm": 2.3803412914276123, |
|
"learning_rate": 5.2846565656565664e-05, |
|
"loss": 1.5602, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 2.456212545973612, |
|
"grad_norm": 2.4755702018737793, |
|
"learning_rate": 5.27960606060606e-05, |
|
"loss": 1.5719, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 2.4587844962835317, |
|
"grad_norm": 2.4618046283721924, |
|
"learning_rate": 5.2745555555555556e-05, |
|
"loss": 1.5675, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 2.461356446593452, |
|
"grad_norm": 2.186459541320801, |
|
"learning_rate": 5.26950505050505e-05, |
|
"loss": 1.5638, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 2.4639283969033716, |
|
"grad_norm": 2.701554298400879, |
|
"learning_rate": 5.2644545454545456e-05, |
|
"loss": 1.5633, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 2.466500347213292, |
|
"grad_norm": 2.445854902267456, |
|
"learning_rate": 5.25940404040404e-05, |
|
"loss": 1.5589, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 2.469072297523212, |
|
"grad_norm": 2.387634515762329, |
|
"learning_rate": 5.254373737373738e-05, |
|
"loss": 1.5658, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.4716442478331317, |
|
"grad_norm": 2.4959769248962402, |
|
"learning_rate": 5.249323232323232e-05, |
|
"loss": 1.5637, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 2.474216198143052, |
|
"grad_norm": 2.722851276397705, |
|
"learning_rate": 5.2442727272727274e-05, |
|
"loss": 1.5659, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 2.4767881484529717, |
|
"grad_norm": 2.4769365787506104, |
|
"learning_rate": 5.239222222222222e-05, |
|
"loss": 1.5599, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 2.479360098762892, |
|
"grad_norm": 2.57315993309021, |
|
"learning_rate": 5.2341717171717174e-05, |
|
"loss": 1.5583, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 2.481932049072812, |
|
"grad_norm": 2.319643974304199, |
|
"learning_rate": 5.229121212121212e-05, |
|
"loss": 1.5598, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 2.484503999382732, |
|
"grad_norm": 2.470033645629883, |
|
"learning_rate": 5.224080808080808e-05, |
|
"loss": 1.5547, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 2.487075949692652, |
|
"grad_norm": 3.001162052154541, |
|
"learning_rate": 5.219030303030303e-05, |
|
"loss": 1.5655, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 2.4896479000025717, |
|
"grad_norm": 2.486762523651123, |
|
"learning_rate": 5.213979797979798e-05, |
|
"loss": 1.5738, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 2.492219850312492, |
|
"grad_norm": 2.6207542419433594, |
|
"learning_rate": 5.208939393939394e-05, |
|
"loss": 1.5562, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 2.494791800622412, |
|
"grad_norm": 2.8983652591705322, |
|
"learning_rate": 5.203888888888889e-05, |
|
"loss": 1.5562, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 2.497363750932332, |
|
"grad_norm": 2.157689332962036, |
|
"learning_rate": 5.198838383838384e-05, |
|
"loss": 1.561, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 2.499935701242252, |
|
"grad_norm": 2.469301462173462, |
|
"learning_rate": 5.193787878787879e-05, |
|
"loss": 1.5567, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 2.502507651552172, |
|
"grad_norm": 2.441870927810669, |
|
"learning_rate": 5.188747474747475e-05, |
|
"loss": 1.5579, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 2.505079601862092, |
|
"grad_norm": 2.232508897781372, |
|
"learning_rate": 5.18369696969697e-05, |
|
"loss": 1.5521, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 2.507651552172012, |
|
"grad_norm": 2.48417067527771, |
|
"learning_rate": 5.178646464646465e-05, |
|
"loss": 1.5602, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 2.510223502481932, |
|
"grad_norm": 2.3687491416931152, |
|
"learning_rate": 5.173595959595959e-05, |
|
"loss": 1.5541, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.512795452791852, |
|
"grad_norm": 2.153627395629883, |
|
"learning_rate": 5.168545454545455e-05, |
|
"loss": 1.5581, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 2.515367403101772, |
|
"grad_norm": 2.908628463745117, |
|
"learning_rate": 5.16349494949495e-05, |
|
"loss": 1.5644, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 2.517939353411692, |
|
"grad_norm": 2.5632777214050293, |
|
"learning_rate": 5.158444444444445e-05, |
|
"loss": 1.5542, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 2.5205113037216123, |
|
"grad_norm": 2.2820920944213867, |
|
"learning_rate": 5.15339393939394e-05, |
|
"loss": 1.5538, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 2.523083254031532, |
|
"grad_norm": 2.4731087684631348, |
|
"learning_rate": 5.148343434343434e-05, |
|
"loss": 1.5454, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 2.525655204341452, |
|
"grad_norm": 2.622070789337158, |
|
"learning_rate": 5.14329292929293e-05, |
|
"loss": 1.5595, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 2.528227154651372, |
|
"grad_norm": 2.20470929145813, |
|
"learning_rate": 5.138242424242424e-05, |
|
"loss": 1.5518, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 2.530799104961292, |
|
"grad_norm": 3.232024669647217, |
|
"learning_rate": 5.1331919191919195e-05, |
|
"loss": 1.5537, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 2.5333710552712123, |
|
"grad_norm": 2.674577236175537, |
|
"learning_rate": 5.128151515151516e-05, |
|
"loss": 1.5556, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 2.535943005581132, |
|
"grad_norm": 2.4473094940185547, |
|
"learning_rate": 5.12310101010101e-05, |
|
"loss": 1.5584, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 2.5385149558910522, |
|
"grad_norm": 2.435515880584717, |
|
"learning_rate": 5.1180505050505054e-05, |
|
"loss": 1.5543, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 2.541086906200972, |
|
"grad_norm": 2.112659454345703, |
|
"learning_rate": 5.113e-05, |
|
"loss": 1.5434, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 2.543658856510892, |
|
"grad_norm": 2.5637118816375732, |
|
"learning_rate": 5.107949494949495e-05, |
|
"loss": 1.5566, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 2.5462308068208124, |
|
"grad_norm": 2.8220012187957764, |
|
"learning_rate": 5.10289898989899e-05, |
|
"loss": 1.5556, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 2.548802757130732, |
|
"grad_norm": 2.318514108657837, |
|
"learning_rate": 5.097858585858586e-05, |
|
"loss": 1.5626, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 2.5513747074406523, |
|
"grad_norm": 2.184453248977661, |
|
"learning_rate": 5.092808080808081e-05, |
|
"loss": 1.5428, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.553946657750572, |
|
"grad_norm": 2.3431742191314697, |
|
"learning_rate": 5.087757575757576e-05, |
|
"loss": 1.5507, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 2.5565186080604922, |
|
"grad_norm": 2.6357996463775635, |
|
"learning_rate": 5.082707070707071e-05, |
|
"loss": 1.5588, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 2.5590905583704124, |
|
"grad_norm": 2.3024609088897705, |
|
"learning_rate": 5.077656565656566e-05, |
|
"loss": 1.5406, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 2.561662508680332, |
|
"grad_norm": 3.5537869930267334, |
|
"learning_rate": 5.072616161616162e-05, |
|
"loss": 1.5531, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 2.5642344589902524, |
|
"grad_norm": 2.6683225631713867, |
|
"learning_rate": 5.067565656565657e-05, |
|
"loss": 1.5418, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 2.566806409300172, |
|
"grad_norm": 2.3651461601257324, |
|
"learning_rate": 5.062525252525253e-05, |
|
"loss": 1.5483, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 2.5693783596100923, |
|
"grad_norm": 2.525416374206543, |
|
"learning_rate": 5.0574747474747477e-05, |
|
"loss": 1.5602, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 2.5719503099200125, |
|
"grad_norm": 2.435364007949829, |
|
"learning_rate": 5.052424242424243e-05, |
|
"loss": 1.5521, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 2.5745222602299322, |
|
"grad_norm": 2.486356735229492, |
|
"learning_rate": 5.047383838383839e-05, |
|
"loss": 1.5585, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 2.5770942105398524, |
|
"grad_norm": 2.385429859161377, |
|
"learning_rate": 5.0423333333333336e-05, |
|
"loss": 1.5457, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 2.579666160849772, |
|
"grad_norm": 2.468360185623169, |
|
"learning_rate": 5.037282828282829e-05, |
|
"loss": 1.5565, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 2.5822381111596924, |
|
"grad_norm": 2.0873448848724365, |
|
"learning_rate": 5.0322323232323235e-05, |
|
"loss": 1.5395, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 2.5848100614696126, |
|
"grad_norm": 2.7715628147125244, |
|
"learning_rate": 5.027181818181819e-05, |
|
"loss": 1.5414, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 2.5873820117795323, |
|
"grad_norm": 2.3114826679229736, |
|
"learning_rate": 5.022131313131313e-05, |
|
"loss": 1.5587, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 2.5899539620894525, |
|
"grad_norm": 2.4721546173095703, |
|
"learning_rate": 5.017080808080809e-05, |
|
"loss": 1.548, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 2.5925259123993722, |
|
"grad_norm": 2.3029587268829346, |
|
"learning_rate": 5.012030303030303e-05, |
|
"loss": 1.5513, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.5950978627092924, |
|
"grad_norm": 2.0909407138824463, |
|
"learning_rate": 5.006979797979798e-05, |
|
"loss": 1.5537, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 2.5976698130192126, |
|
"grad_norm": 2.5967423915863037, |
|
"learning_rate": 5.001929292929293e-05, |
|
"loss": 1.5472, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 2.6002417633291324, |
|
"grad_norm": 2.921551465988159, |
|
"learning_rate": 4.996878787878788e-05, |
|
"loss": 1.5311, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 2.6028137136390526, |
|
"grad_norm": 2.4251134395599365, |
|
"learning_rate": 4.991828282828283e-05, |
|
"loss": 1.5411, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 2.6053856639489723, |
|
"grad_norm": 2.736292600631714, |
|
"learning_rate": 4.986777777777778e-05, |
|
"loss": 1.5552, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 2.6079576142588925, |
|
"grad_norm": 2.5091052055358887, |
|
"learning_rate": 4.9817272727272726e-05, |
|
"loss": 1.5535, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 2.6105295645688127, |
|
"grad_norm": 2.42288875579834, |
|
"learning_rate": 4.976686868686869e-05, |
|
"loss": 1.552, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 2.6131015148787324, |
|
"grad_norm": 2.5599241256713867, |
|
"learning_rate": 4.971646464646465e-05, |
|
"loss": 1.5447, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 2.6156734651886526, |
|
"grad_norm": 2.5007565021514893, |
|
"learning_rate": 4.96659595959596e-05, |
|
"loss": 1.5493, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 2.6182454154985724, |
|
"grad_norm": 2.389376401901245, |
|
"learning_rate": 4.961545454545455e-05, |
|
"loss": 1.5411, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 2.6208173658084926, |
|
"grad_norm": 2.1207945346832275, |
|
"learning_rate": 4.95649494949495e-05, |
|
"loss": 1.5445, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 2.6233893161184128, |
|
"grad_norm": 2.447849750518799, |
|
"learning_rate": 4.9514444444444444e-05, |
|
"loss": 1.5445, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 2.6259612664283325, |
|
"grad_norm": 2.1976988315582275, |
|
"learning_rate": 4.94639393939394e-05, |
|
"loss": 1.5427, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 2.6285332167382527, |
|
"grad_norm": 3.0653698444366455, |
|
"learning_rate": 4.9413535353535356e-05, |
|
"loss": 1.5496, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 2.6311051670481724, |
|
"grad_norm": 2.4654083251953125, |
|
"learning_rate": 4.93630303030303e-05, |
|
"loss": 1.5482, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 2.6336771173580926, |
|
"grad_norm": 2.5089690685272217, |
|
"learning_rate": 4.9312525252525256e-05, |
|
"loss": 1.5525, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.636249067668013, |
|
"grad_norm": 2.4755592346191406, |
|
"learning_rate": 4.92620202020202e-05, |
|
"loss": 1.5527, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 2.6388210179779326, |
|
"grad_norm": 2.8626086711883545, |
|
"learning_rate": 4.9211515151515155e-05, |
|
"loss": 1.5388, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 2.6413929682878527, |
|
"grad_norm": 2.2445054054260254, |
|
"learning_rate": 4.91610101010101e-05, |
|
"loss": 1.5513, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 2.6439649185977725, |
|
"grad_norm": 2.358511447906494, |
|
"learning_rate": 4.911050505050505e-05, |
|
"loss": 1.5538, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 2.6465368689076927, |
|
"grad_norm": 2.549711227416992, |
|
"learning_rate": 4.906e-05, |
|
"loss": 1.5489, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 2.649108819217613, |
|
"grad_norm": 2.0755455493927, |
|
"learning_rate": 4.900949494949495e-05, |
|
"loss": 1.5371, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 2.6516807695275326, |
|
"grad_norm": 2.5039193630218506, |
|
"learning_rate": 4.895909090909091e-05, |
|
"loss": 1.5404, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 2.654252719837453, |
|
"grad_norm": 2.759974241256714, |
|
"learning_rate": 4.890858585858586e-05, |
|
"loss": 1.5441, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 2.6568246701473726, |
|
"grad_norm": 1.9532139301300049, |
|
"learning_rate": 4.8858080808080807e-05, |
|
"loss": 1.5497, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 2.6593966204572927, |
|
"grad_norm": 3.1684305667877197, |
|
"learning_rate": 4.880757575757576e-05, |
|
"loss": 1.5516, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 2.661968570767213, |
|
"grad_norm": 2.467054843902588, |
|
"learning_rate": 4.8757070707070706e-05, |
|
"loss": 1.538, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 2.6645405210771327, |
|
"grad_norm": 2.3552815914154053, |
|
"learning_rate": 4.870656565656566e-05, |
|
"loss": 1.5521, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 2.667112471387053, |
|
"grad_norm": 2.5004801750183105, |
|
"learning_rate": 4.865606060606061e-05, |
|
"loss": 1.5419, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 2.6696844216969726, |
|
"grad_norm": 2.8119254112243652, |
|
"learning_rate": 4.8605656565656565e-05, |
|
"loss": 1.5504, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 2.672256372006893, |
|
"grad_norm": 2.6918792724609375, |
|
"learning_rate": 4.855515151515152e-05, |
|
"loss": 1.5384, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 2.674828322316813, |
|
"grad_norm": 2.4995195865631104, |
|
"learning_rate": 4.850464646464647e-05, |
|
"loss": 1.5441, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.6774002726267327, |
|
"grad_norm": 2.166651964187622, |
|
"learning_rate": 4.845414141414142e-05, |
|
"loss": 1.5404, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 2.679972222936653, |
|
"grad_norm": 2.4418795108795166, |
|
"learning_rate": 4.840373737373737e-05, |
|
"loss": 1.527, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 2.6825441732465727, |
|
"grad_norm": 2.3248789310455322, |
|
"learning_rate": 4.835323232323233e-05, |
|
"loss": 1.5377, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 2.685116123556493, |
|
"grad_norm": 2.5221030712127686, |
|
"learning_rate": 4.830272727272728e-05, |
|
"loss": 1.5421, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 2.687688073866413, |
|
"grad_norm": 2.7731223106384277, |
|
"learning_rate": 4.825222222222222e-05, |
|
"loss": 1.5382, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 2.690260024176333, |
|
"grad_norm": 2.157928943634033, |
|
"learning_rate": 4.820181818181819e-05, |
|
"loss": 1.5402, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 2.692831974486253, |
|
"grad_norm": 2.3403429985046387, |
|
"learning_rate": 4.8151313131313136e-05, |
|
"loss": 1.5323, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 2.6954039247961727, |
|
"grad_norm": 2.8037800788879395, |
|
"learning_rate": 4.810080808080808e-05, |
|
"loss": 1.5366, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 2.697975875106093, |
|
"grad_norm": 2.8812320232391357, |
|
"learning_rate": 4.8050303030303035e-05, |
|
"loss": 1.5489, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 2.700547825416013, |
|
"grad_norm": 2.4520397186279297, |
|
"learning_rate": 4.799979797979798e-05, |
|
"loss": 1.5409, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 2.703119775725933, |
|
"grad_norm": 2.239299774169922, |
|
"learning_rate": 4.7949292929292935e-05, |
|
"loss": 1.549, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 2.705691726035853, |
|
"grad_norm": 2.172064781188965, |
|
"learning_rate": 4.7898888888888894e-05, |
|
"loss": 1.5365, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 2.708263676345773, |
|
"grad_norm": 2.851077079772949, |
|
"learning_rate": 4.784848484848485e-05, |
|
"loss": 1.5329, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 2.710835626655693, |
|
"grad_norm": 2.423591136932373, |
|
"learning_rate": 4.77979797979798e-05, |
|
"loss": 1.5382, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 2.713407576965613, |
|
"grad_norm": 2.675351858139038, |
|
"learning_rate": 4.774747474747475e-05, |
|
"loss": 1.548, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 2.715979527275533, |
|
"grad_norm": 2.165239095687866, |
|
"learning_rate": 4.76969696969697e-05, |
|
"loss": 1.5423, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.718551477585453, |
|
"grad_norm": 2.6030383110046387, |
|
"learning_rate": 4.764646464646465e-05, |
|
"loss": 1.5309, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 2.721123427895373, |
|
"grad_norm": 2.359309196472168, |
|
"learning_rate": 4.75959595959596e-05, |
|
"loss": 1.5286, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 2.723695378205293, |
|
"grad_norm": 2.1645898818969727, |
|
"learning_rate": 4.7545454545454545e-05, |
|
"loss": 1.5376, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 2.7262673285152133, |
|
"grad_norm": 2.3792974948883057, |
|
"learning_rate": 4.74949494949495e-05, |
|
"loss": 1.5367, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 2.728839278825133, |
|
"grad_norm": 2.7375681400299072, |
|
"learning_rate": 4.7444444444444445e-05, |
|
"loss": 1.5249, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 2.731411229135053, |
|
"grad_norm": 2.417910099029541, |
|
"learning_rate": 4.7394040404040405e-05, |
|
"loss": 1.534, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 2.733983179444973, |
|
"grad_norm": 2.386302947998047, |
|
"learning_rate": 4.734363636363637e-05, |
|
"loss": 1.538, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 2.736555129754893, |
|
"grad_norm": 2.2796523571014404, |
|
"learning_rate": 4.729313131313132e-05, |
|
"loss": 1.5281, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 2.7391270800648133, |
|
"grad_norm": 2.4717445373535156, |
|
"learning_rate": 4.7242626262626264e-05, |
|
"loss": 1.5375, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 2.741699030374733, |
|
"grad_norm": 2.348935842514038, |
|
"learning_rate": 4.719212121212122e-05, |
|
"loss": 1.5303, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 2.7442709806846532, |
|
"grad_norm": 2.4359893798828125, |
|
"learning_rate": 4.714161616161616e-05, |
|
"loss": 1.5418, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 2.746842930994573, |
|
"grad_norm": 3.118255853652954, |
|
"learning_rate": 4.7091111111111116e-05, |
|
"loss": 1.538, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 2.749414881304493, |
|
"grad_norm": 2.450284004211426, |
|
"learning_rate": 4.704060606060606e-05, |
|
"loss": 1.5321, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 2.7519868316144134, |
|
"grad_norm": 2.3103396892547607, |
|
"learning_rate": 4.699010101010101e-05, |
|
"loss": 1.5388, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 2.754558781924333, |
|
"grad_norm": 2.439276695251465, |
|
"learning_rate": 4.693959595959596e-05, |
|
"loss": 1.5228, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 2.7571307322342533, |
|
"grad_norm": 2.310704231262207, |
|
"learning_rate": 4.688909090909091e-05, |
|
"loss": 1.52, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.759702682544173, |
|
"grad_norm": 3.0740084648132324, |
|
"learning_rate": 4.683868686868687e-05, |
|
"loss": 1.5354, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 2.7622746328540932, |
|
"grad_norm": 2.635913848876953, |
|
"learning_rate": 4.678818181818182e-05, |
|
"loss": 1.5301, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 2.7648465831640134, |
|
"grad_norm": 2.3458645343780518, |
|
"learning_rate": 4.673767676767677e-05, |
|
"loss": 1.5213, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 2.767418533473933, |
|
"grad_norm": 2.191563367843628, |
|
"learning_rate": 4.668717171717172e-05, |
|
"loss": 1.5309, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 2.7699904837838534, |
|
"grad_norm": 2.256751537322998, |
|
"learning_rate": 4.663666666666667e-05, |
|
"loss": 1.5254, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 2.772562434093773, |
|
"grad_norm": 2.0021133422851562, |
|
"learning_rate": 4.658616161616162e-05, |
|
"loss": 1.5261, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 2.7751343844036933, |
|
"grad_norm": 2.282194137573242, |
|
"learning_rate": 4.6535656565656566e-05, |
|
"loss": 1.5275, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 2.7777063347136135, |
|
"grad_norm": 2.4739699363708496, |
|
"learning_rate": 4.6485252525252526e-05, |
|
"loss": 1.5292, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 2.7802782850235332, |
|
"grad_norm": 2.498216390609741, |
|
"learning_rate": 4.643474747474747e-05, |
|
"loss": 1.5248, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 2.7828502353334534, |
|
"grad_norm": 2.388746738433838, |
|
"learning_rate": 4.6384242424242425e-05, |
|
"loss": 1.5217, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 2.785422185643373, |
|
"grad_norm": 2.673908233642578, |
|
"learning_rate": 4.633373737373737e-05, |
|
"loss": 1.5309, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 2.7879941359532934, |
|
"grad_norm": 2.3223876953125, |
|
"learning_rate": 4.628333333333333e-05, |
|
"loss": 1.5313, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 2.7905660862632136, |
|
"grad_norm": 2.03485369682312, |
|
"learning_rate": 4.62329292929293e-05, |
|
"loss": 1.5357, |
|
"step": 542500 |
|
}, |
|
{ |
|
"epoch": 2.7931380365731333, |
|
"grad_norm": 2.342752456665039, |
|
"learning_rate": 4.6182424242424244e-05, |
|
"loss": 1.5327, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 2.7957099868830535, |
|
"grad_norm": 2.879817008972168, |
|
"learning_rate": 4.613191919191919e-05, |
|
"loss": 1.5384, |
|
"step": 543500 |
|
}, |
|
{ |
|
"epoch": 2.7982819371929732, |
|
"grad_norm": 2.0930681228637695, |
|
"learning_rate": 4.608141414141414e-05, |
|
"loss": 1.5246, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.8008538875028934, |
|
"grad_norm": 2.341869592666626, |
|
"learning_rate": 4.603090909090909e-05, |
|
"loss": 1.5255, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 2.8034258378128136, |
|
"grad_norm": 2.309088945388794, |
|
"learning_rate": 4.598040404040404e-05, |
|
"loss": 1.5201, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 2.8059977881227334, |
|
"grad_norm": 2.4833176136016846, |
|
"learning_rate": 4.592989898989899e-05, |
|
"loss": 1.5327, |
|
"step": 545500 |
|
}, |
|
{ |
|
"epoch": 2.8085697384326536, |
|
"grad_norm": 2.2396302223205566, |
|
"learning_rate": 4.587939393939394e-05, |
|
"loss": 1.52, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 2.8111416887425733, |
|
"grad_norm": 2.740811586380005, |
|
"learning_rate": 4.5828888888888895e-05, |
|
"loss": 1.5327, |
|
"step": 546500 |
|
}, |
|
{ |
|
"epoch": 2.8137136390524935, |
|
"grad_norm": 3.175210952758789, |
|
"learning_rate": 4.577848484848485e-05, |
|
"loss": 1.529, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 2.8162855893624137, |
|
"grad_norm": 2.597053050994873, |
|
"learning_rate": 4.5727979797979795e-05, |
|
"loss": 1.5238, |
|
"step": 547500 |
|
}, |
|
{ |
|
"epoch": 2.8188575396723334, |
|
"grad_norm": 2.355821132659912, |
|
"learning_rate": 4.5677474747474754e-05, |
|
"loss": 1.53, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 2.8214294899822536, |
|
"grad_norm": 2.92700457572937, |
|
"learning_rate": 4.56269696969697e-05, |
|
"loss": 1.5212, |
|
"step": 548500 |
|
}, |
|
{ |
|
"epoch": 2.8240014402921734, |
|
"grad_norm": 2.831411838531494, |
|
"learning_rate": 4.557646464646465e-05, |
|
"loss": 1.527, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 2.8265733906020936, |
|
"grad_norm": 2.444070816040039, |
|
"learning_rate": 4.5526060606060614e-05, |
|
"loss": 1.535, |
|
"step": 549500 |
|
}, |
|
{ |
|
"epoch": 2.8291453409120138, |
|
"grad_norm": 2.4589648246765137, |
|
"learning_rate": 4.547555555555556e-05, |
|
"loss": 1.5284, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 2.8317172912219335, |
|
"grad_norm": 2.601458787918091, |
|
"learning_rate": 4.5425050505050506e-05, |
|
"loss": 1.5281, |
|
"step": 550500 |
|
}, |
|
{ |
|
"epoch": 2.8342892415318537, |
|
"grad_norm": 2.6681647300720215, |
|
"learning_rate": 4.537454545454546e-05, |
|
"loss": 1.5211, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 2.8368611918417734, |
|
"grad_norm": 2.3051042556762695, |
|
"learning_rate": 4.5324040404040406e-05, |
|
"loss": 1.5246, |
|
"step": 551500 |
|
}, |
|
{ |
|
"epoch": 2.8394331421516936, |
|
"grad_norm": 3.2226884365081787, |
|
"learning_rate": 4.527353535353536e-05, |
|
"loss": 1.5223, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.842005092461614, |
|
"grad_norm": 2.27409291267395, |
|
"learning_rate": 4.5223030303030305e-05, |
|
"loss": 1.5167, |
|
"step": 552500 |
|
}, |
|
{ |
|
"epoch": 2.8445770427715336, |
|
"grad_norm": 2.736320734024048, |
|
"learning_rate": 4.517272727272727e-05, |
|
"loss": 1.5226, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 2.8471489930814537, |
|
"grad_norm": 2.539435386657715, |
|
"learning_rate": 4.5122222222222224e-05, |
|
"loss": 1.521, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 2.8497209433913735, |
|
"grad_norm": 2.52431321144104, |
|
"learning_rate": 4.507171717171718e-05, |
|
"loss": 1.5321, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 2.8522928937012937, |
|
"grad_norm": 2.110541343688965, |
|
"learning_rate": 4.5021212121212124e-05, |
|
"loss": 1.5233, |
|
"step": 554500 |
|
}, |
|
{ |
|
"epoch": 2.854864844011214, |
|
"grad_norm": 2.501573085784912, |
|
"learning_rate": 4.497070707070708e-05, |
|
"loss": 1.5267, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 2.8574367943211336, |
|
"grad_norm": 2.4063198566436768, |
|
"learning_rate": 4.4920303030303036e-05, |
|
"loss": 1.5171, |
|
"step": 555500 |
|
}, |
|
{ |
|
"epoch": 2.860008744631054, |
|
"grad_norm": 3.3333494663238525, |
|
"learning_rate": 4.486979797979798e-05, |
|
"loss": 1.5044, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 2.8625806949409736, |
|
"grad_norm": 2.509376049041748, |
|
"learning_rate": 4.4819292929292936e-05, |
|
"loss": 1.5242, |
|
"step": 556500 |
|
}, |
|
{ |
|
"epoch": 2.8651526452508937, |
|
"grad_norm": 2.505197048187256, |
|
"learning_rate": 4.476878787878788e-05, |
|
"loss": 1.5293, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 2.867724595560814, |
|
"grad_norm": 2.4513468742370605, |
|
"learning_rate": 4.471828282828283e-05, |
|
"loss": 1.5188, |
|
"step": 557500 |
|
}, |
|
{ |
|
"epoch": 2.8702965458707337, |
|
"grad_norm": 2.7993083000183105, |
|
"learning_rate": 4.466777777777778e-05, |
|
"loss": 1.5223, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 2.872868496180654, |
|
"grad_norm": 2.5785391330718994, |
|
"learning_rate": 4.461727272727273e-05, |
|
"loss": 1.5273, |
|
"step": 558500 |
|
}, |
|
{ |
|
"epoch": 2.8754404464905736, |
|
"grad_norm": 2.3784685134887695, |
|
"learning_rate": 4.456676767676768e-05, |
|
"loss": 1.5144, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 2.878012396800494, |
|
"grad_norm": 2.3746955394744873, |
|
"learning_rate": 4.451636363636364e-05, |
|
"loss": 1.505, |
|
"step": 559500 |
|
}, |
|
{ |
|
"epoch": 2.880584347110414, |
|
"grad_norm": 2.898721218109131, |
|
"learning_rate": 4.446585858585859e-05, |
|
"loss": 1.5281, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.8831562974203337, |
|
"grad_norm": 2.6563735008239746, |
|
"learning_rate": 4.441535353535354e-05, |
|
"loss": 1.5211, |
|
"step": 560500 |
|
}, |
|
{ |
|
"epoch": 2.885728247730254, |
|
"grad_norm": 2.060058832168579, |
|
"learning_rate": 4.4364848484848487e-05, |
|
"loss": 1.5165, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 2.8883001980401737, |
|
"grad_norm": 2.6480188369750977, |
|
"learning_rate": 4.431434343434343e-05, |
|
"loss": 1.5312, |
|
"step": 561500 |
|
}, |
|
{ |
|
"epoch": 2.890872148350094, |
|
"grad_norm": 2.5112969875335693, |
|
"learning_rate": 4.4263838383838386e-05, |
|
"loss": 1.5057, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 2.893444098660014, |
|
"grad_norm": 2.4975204467773438, |
|
"learning_rate": 4.421333333333333e-05, |
|
"loss": 1.5081, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 2.896016048969934, |
|
"grad_norm": 2.5974085330963135, |
|
"learning_rate": 4.4162828282828286e-05, |
|
"loss": 1.5309, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 2.898587999279854, |
|
"grad_norm": 2.727055788040161, |
|
"learning_rate": 4.411232323232323e-05, |
|
"loss": 1.5145, |
|
"step": 563500 |
|
}, |
|
{ |
|
"epoch": 2.9011599495897737, |
|
"grad_norm": 2.283076763153076, |
|
"learning_rate": 4.406191919191919e-05, |
|
"loss": 1.5187, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 2.903731899899694, |
|
"grad_norm": 2.706749439239502, |
|
"learning_rate": 4.4011414141414145e-05, |
|
"loss": 1.5075, |
|
"step": 564500 |
|
}, |
|
{ |
|
"epoch": 2.906303850209614, |
|
"grad_norm": 2.3458900451660156, |
|
"learning_rate": 4.396090909090909e-05, |
|
"loss": 1.5225, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 2.908875800519534, |
|
"grad_norm": 2.2899625301361084, |
|
"learning_rate": 4.3910404040404044e-05, |
|
"loss": 1.5058, |
|
"step": 565500 |
|
}, |
|
{ |
|
"epoch": 2.911447750829454, |
|
"grad_norm": 2.800731658935547, |
|
"learning_rate": 4.385989898989899e-05, |
|
"loss": 1.5203, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 2.914019701139374, |
|
"grad_norm": 2.9070866107940674, |
|
"learning_rate": 4.380949494949495e-05, |
|
"loss": 1.5254, |
|
"step": 566500 |
|
}, |
|
{ |
|
"epoch": 2.916591651449294, |
|
"grad_norm": 2.3995327949523926, |
|
"learning_rate": 4.3758989898989896e-05, |
|
"loss": 1.5264, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 2.919163601759214, |
|
"grad_norm": 3.0303332805633545, |
|
"learning_rate": 4.370848484848485e-05, |
|
"loss": 1.5199, |
|
"step": 567500 |
|
}, |
|
{ |
|
"epoch": 2.921735552069134, |
|
"grad_norm": 2.392720937728882, |
|
"learning_rate": 4.3657979797979796e-05, |
|
"loss": 1.5069, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 2.924307502379054, |
|
"grad_norm": 2.5260987281799316, |
|
"learning_rate": 4.3607575757575755e-05, |
|
"loss": 1.5239, |
|
"step": 568500 |
|
}, |
|
{ |
|
"epoch": 2.926879452688974, |
|
"grad_norm": 1.9965590238571167, |
|
"learning_rate": 4.355707070707071e-05, |
|
"loss": 1.5112, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 2.929451402998894, |
|
"grad_norm": 2.7305872440338135, |
|
"learning_rate": 4.3506565656565655e-05, |
|
"loss": 1.5222, |
|
"step": 569500 |
|
}, |
|
{ |
|
"epoch": 2.9320233533088143, |
|
"grad_norm": 2.196129083633423, |
|
"learning_rate": 4.345606060606061e-05, |
|
"loss": 1.5237, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 2.934595303618734, |
|
"grad_norm": 2.489001750946045, |
|
"learning_rate": 4.3405555555555554e-05, |
|
"loss": 1.5122, |
|
"step": 570500 |
|
}, |
|
{ |
|
"epoch": 2.937167253928654, |
|
"grad_norm": 2.8367908000946045, |
|
"learning_rate": 4.335505050505051e-05, |
|
"loss": 1.5113, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 2.939739204238574, |
|
"grad_norm": 2.413041114807129, |
|
"learning_rate": 4.330454545454546e-05, |
|
"loss": 1.516, |
|
"step": 571500 |
|
}, |
|
{ |
|
"epoch": 2.942311154548494, |
|
"grad_norm": 2.2877037525177, |
|
"learning_rate": 4.325404040404041e-05, |
|
"loss": 1.5172, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 2.9448831048584143, |
|
"grad_norm": 2.668660879135132, |
|
"learning_rate": 4.3203636363636366e-05, |
|
"loss": 1.5107, |
|
"step": 572500 |
|
}, |
|
{ |
|
"epoch": 2.947455055168334, |
|
"grad_norm": 3.0024032592773438, |
|
"learning_rate": 4.315313131313132e-05, |
|
"loss": 1.5144, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 2.9500270054782542, |
|
"grad_norm": 2.159865617752075, |
|
"learning_rate": 4.3102626262626266e-05, |
|
"loss": 1.5036, |
|
"step": 573500 |
|
}, |
|
{ |
|
"epoch": 2.952598955788174, |
|
"grad_norm": 2.5722429752349854, |
|
"learning_rate": 4.305212121212122e-05, |
|
"loss": 1.5255, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 2.955170906098094, |
|
"grad_norm": 2.755248546600342, |
|
"learning_rate": 4.300171717171718e-05, |
|
"loss": 1.5051, |
|
"step": 574500 |
|
}, |
|
{ |
|
"epoch": 2.9577428564080144, |
|
"grad_norm": 2.2805163860321045, |
|
"learning_rate": 4.2951212121212125e-05, |
|
"loss": 1.5221, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 2.960314806717934, |
|
"grad_norm": 3.676866292953491, |
|
"learning_rate": 4.290070707070707e-05, |
|
"loss": 1.5145, |
|
"step": 575500 |
|
}, |
|
{ |
|
"epoch": 2.9628867570278543, |
|
"grad_norm": 2.105748414993286, |
|
"learning_rate": 4.2850202020202024e-05, |
|
"loss": 1.5063, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.965458707337774, |
|
"grad_norm": 2.225126266479492, |
|
"learning_rate": 4.279969696969697e-05, |
|
"loss": 1.5101, |
|
"step": 576500 |
|
}, |
|
{ |
|
"epoch": 2.9680306576476942, |
|
"grad_norm": 2.7732033729553223, |
|
"learning_rate": 4.274929292929293e-05, |
|
"loss": 1.5072, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 2.9706026079576144, |
|
"grad_norm": 2.483477830886841, |
|
"learning_rate": 4.2698787878787883e-05, |
|
"loss": 1.5167, |
|
"step": 577500 |
|
}, |
|
{ |
|
"epoch": 2.973174558267534, |
|
"grad_norm": 2.6519720554351807, |
|
"learning_rate": 4.264828282828283e-05, |
|
"loss": 1.5196, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 2.9757465085774544, |
|
"grad_norm": 2.3944153785705566, |
|
"learning_rate": 4.259777777777778e-05, |
|
"loss": 1.5184, |
|
"step": 578500 |
|
}, |
|
{ |
|
"epoch": 2.978318458887374, |
|
"grad_norm": 2.850205183029175, |
|
"learning_rate": 4.254727272727273e-05, |
|
"loss": 1.5124, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 2.9808904091972943, |
|
"grad_norm": 1.9868264198303223, |
|
"learning_rate": 4.249676767676768e-05, |
|
"loss": 1.5045, |
|
"step": 579500 |
|
}, |
|
{ |
|
"epoch": 2.9834623595072145, |
|
"grad_norm": 2.709223985671997, |
|
"learning_rate": 4.244636363636364e-05, |
|
"loss": 1.512, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 2.9860343098171342, |
|
"grad_norm": 2.369521141052246, |
|
"learning_rate": 4.239585858585859e-05, |
|
"loss": 1.5121, |
|
"step": 580500 |
|
}, |
|
{ |
|
"epoch": 2.9886062601270544, |
|
"grad_norm": 2.712256669998169, |
|
"learning_rate": 4.2345353535353535e-05, |
|
"loss": 1.5157, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 2.991178210436974, |
|
"grad_norm": 2.4199235439300537, |
|
"learning_rate": 4.229484848484849e-05, |
|
"loss": 1.5116, |
|
"step": 581500 |
|
}, |
|
{ |
|
"epoch": 2.9937501607468944, |
|
"grad_norm": 2.4358603954315186, |
|
"learning_rate": 4.2244343434343434e-05, |
|
"loss": 1.5268, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 2.9963221110568146, |
|
"grad_norm": 2.8168931007385254, |
|
"learning_rate": 4.2193939393939394e-05, |
|
"loss": 1.5058, |
|
"step": 582500 |
|
}, |
|
{ |
|
"epoch": 2.9988940613667343, |
|
"grad_norm": 2.282642364501953, |
|
"learning_rate": 4.214343434343435e-05, |
|
"loss": 1.4964, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 3.0014660116766545, |
|
"grad_norm": 2.6705520153045654, |
|
"learning_rate": 4.209292929292929e-05, |
|
"loss": 1.5017, |
|
"step": 583500 |
|
}, |
|
{ |
|
"epoch": 3.0040379619865742, |
|
"grad_norm": 2.6078131198883057, |
|
"learning_rate": 4.2042424242424246e-05, |
|
"loss": 1.4998, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 3.0066099122964944, |
|
"grad_norm": 2.8063297271728516, |
|
"learning_rate": 4.1992020202020206e-05, |
|
"loss": 1.497, |
|
"step": 584500 |
|
}, |
|
{ |
|
"epoch": 3.0091818626064146, |
|
"grad_norm": 2.291599750518799, |
|
"learning_rate": 4.194151515151515e-05, |
|
"loss": 1.5016, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 3.0117538129163344, |
|
"grad_norm": 2.3349857330322266, |
|
"learning_rate": 4.1891010101010105e-05, |
|
"loss": 1.4991, |
|
"step": 585500 |
|
}, |
|
{ |
|
"epoch": 3.0143257632262546, |
|
"grad_norm": 2.4059336185455322, |
|
"learning_rate": 4.184050505050505e-05, |
|
"loss": 1.5076, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 3.0168977135361743, |
|
"grad_norm": 1.973617672920227, |
|
"learning_rate": 4.179e-05, |
|
"loss": 1.497, |
|
"step": 586500 |
|
}, |
|
{ |
|
"epoch": 3.0194696638460945, |
|
"grad_norm": 2.9620471000671387, |
|
"learning_rate": 4.1739595959595964e-05, |
|
"loss": 1.5149, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 3.0220416141560147, |
|
"grad_norm": 2.4589638710021973, |
|
"learning_rate": 4.168909090909091e-05, |
|
"loss": 1.5073, |
|
"step": 587500 |
|
}, |
|
{ |
|
"epoch": 3.0246135644659344, |
|
"grad_norm": 2.5346004962921143, |
|
"learning_rate": 4.163858585858586e-05, |
|
"loss": 1.5117, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 3.0271855147758546, |
|
"grad_norm": 2.4980521202087402, |
|
"learning_rate": 4.158808080808081e-05, |
|
"loss": 1.5104, |
|
"step": 588500 |
|
}, |
|
{ |
|
"epoch": 3.0297574650857744, |
|
"grad_norm": 2.5343849658966064, |
|
"learning_rate": 4.1537575757575756e-05, |
|
"loss": 1.4911, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 3.0323294153956946, |
|
"grad_norm": 2.3915090560913086, |
|
"learning_rate": 4.148707070707071e-05, |
|
"loss": 1.5047, |
|
"step": 589500 |
|
}, |
|
{ |
|
"epoch": 3.0349013657056148, |
|
"grad_norm": 2.2896182537078857, |
|
"learning_rate": 4.1436565656565656e-05, |
|
"loss": 1.4912, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 3.0374733160155345, |
|
"grad_norm": 2.66957426071167, |
|
"learning_rate": 4.1386161616161616e-05, |
|
"loss": 1.5044, |
|
"step": 590500 |
|
}, |
|
{ |
|
"epoch": 3.0400452663254547, |
|
"grad_norm": 2.3858649730682373, |
|
"learning_rate": 4.133565656565657e-05, |
|
"loss": 1.5103, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 3.0426172166353744, |
|
"grad_norm": 2.154978036880493, |
|
"learning_rate": 4.128525252525253e-05, |
|
"loss": 1.5072, |
|
"step": 591500 |
|
}, |
|
{ |
|
"epoch": 3.0451891669452946, |
|
"grad_norm": 2.9559261798858643, |
|
"learning_rate": 4.1234747474747475e-05, |
|
"loss": 1.5125, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 3.047761117255215, |
|
"grad_norm": 2.4529426097869873, |
|
"learning_rate": 4.118424242424243e-05, |
|
"loss": 1.4832, |
|
"step": 592500 |
|
}, |
|
{ |
|
"epoch": 3.0503330675651346, |
|
"grad_norm": 2.664656162261963, |
|
"learning_rate": 4.1133737373737374e-05, |
|
"loss": 1.4932, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 3.0529050178750547, |
|
"grad_norm": 2.5239176750183105, |
|
"learning_rate": 4.108323232323232e-05, |
|
"loss": 1.4963, |
|
"step": 593500 |
|
}, |
|
{ |
|
"epoch": 3.0554769681849745, |
|
"grad_norm": 2.7687795162200928, |
|
"learning_rate": 4.1032727272727274e-05, |
|
"loss": 1.5024, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 3.0580489184948947, |
|
"grad_norm": 2.636725425720215, |
|
"learning_rate": 4.098222222222222e-05, |
|
"loss": 1.5095, |
|
"step": 594500 |
|
}, |
|
{ |
|
"epoch": 3.060620868804815, |
|
"grad_norm": 2.643148899078369, |
|
"learning_rate": 4.093171717171717e-05, |
|
"loss": 1.5023, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 3.0631928191147346, |
|
"grad_norm": 2.728957176208496, |
|
"learning_rate": 4.0881212121212126e-05, |
|
"loss": 1.5057, |
|
"step": 595500 |
|
}, |
|
{ |
|
"epoch": 3.065764769424655, |
|
"grad_norm": 2.0928802490234375, |
|
"learning_rate": 4.083070707070707e-05, |
|
"loss": 1.5028, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 3.0683367197345746, |
|
"grad_norm": 2.6500329971313477, |
|
"learning_rate": 4.078030303030303e-05, |
|
"loss": 1.5014, |
|
"step": 596500 |
|
}, |
|
{ |
|
"epoch": 3.0709086700444947, |
|
"grad_norm": 3.050570249557495, |
|
"learning_rate": 4.0729797979797985e-05, |
|
"loss": 1.4955, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 3.073480620354415, |
|
"grad_norm": 2.7134509086608887, |
|
"learning_rate": 4.067929292929293e-05, |
|
"loss": 1.4975, |
|
"step": 597500 |
|
}, |
|
{ |
|
"epoch": 3.0760525706643347, |
|
"grad_norm": 2.270643711090088, |
|
"learning_rate": 4.0628787878787885e-05, |
|
"loss": 1.4995, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 3.078624520974255, |
|
"grad_norm": 2.371786594390869, |
|
"learning_rate": 4.057828282828283e-05, |
|
"loss": 1.5082, |
|
"step": 598500 |
|
}, |
|
{ |
|
"epoch": 3.0811964712841746, |
|
"grad_norm": 2.286396026611328, |
|
"learning_rate": 4.052787878787879e-05, |
|
"loss": 1.5081, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 3.083768421594095, |
|
"grad_norm": 2.2606699466705322, |
|
"learning_rate": 4.0477373737373744e-05, |
|
"loss": 1.4959, |
|
"step": 599500 |
|
}, |
|
{ |
|
"epoch": 3.086340371904015, |
|
"grad_norm": 2.225919008255005, |
|
"learning_rate": 4.0426969696969696e-05, |
|
"loss": 1.5053, |
|
"step": 600000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1000000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 50000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0117530008615338e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|