Qwen3-32B-augmented-cf6a1999 / trainer_state.json
Stewy Slocum
Add fine-tuned model
4d5b506
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 0,
"global_step": 1274,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007849293563579278,
"grad_norm": 0.3427978456020355,
"learning_rate": 1e-05,
"loss": 2.1035,
"step": 1
},
{
"epoch": 0.0015698587127158557,
"grad_norm": 0.332590788602829,
"learning_rate": 9.992150706436422e-06,
"loss": 2.0738,
"step": 2
},
{
"epoch": 0.002354788069073783,
"grad_norm": 0.36298516392707825,
"learning_rate": 9.984301412872842e-06,
"loss": 2.0758,
"step": 3
},
{
"epoch": 0.0031397174254317113,
"grad_norm": 0.32631000876426697,
"learning_rate": 9.976452119309263e-06,
"loss": 1.9876,
"step": 4
},
{
"epoch": 0.003924646781789639,
"grad_norm": 0.36786606907844543,
"learning_rate": 9.968602825745683e-06,
"loss": 2.1213,
"step": 5
},
{
"epoch": 0.004709576138147566,
"grad_norm": 0.34687480330467224,
"learning_rate": 9.960753532182104e-06,
"loss": 2.0017,
"step": 6
},
{
"epoch": 0.005494505494505495,
"grad_norm": 0.3398713171482086,
"learning_rate": 9.952904238618524e-06,
"loss": 1.9805,
"step": 7
},
{
"epoch": 0.006279434850863423,
"grad_norm": 0.3185282051563263,
"learning_rate": 9.945054945054946e-06,
"loss": 1.8601,
"step": 8
},
{
"epoch": 0.00706436420722135,
"grad_norm": 0.372031033039093,
"learning_rate": 9.937205651491367e-06,
"loss": 2.1202,
"step": 9
},
{
"epoch": 0.007849293563579277,
"grad_norm": 0.3446281850337982,
"learning_rate": 9.929356357927787e-06,
"loss": 1.9856,
"step": 10
},
{
"epoch": 0.008634222919937205,
"grad_norm": 0.32924437522888184,
"learning_rate": 9.921507064364208e-06,
"loss": 1.8849,
"step": 11
},
{
"epoch": 0.009419152276295133,
"grad_norm": 0.33408552408218384,
"learning_rate": 9.91365777080063e-06,
"loss": 1.8825,
"step": 12
},
{
"epoch": 0.01020408163265306,
"grad_norm": 0.3477867841720581,
"learning_rate": 9.90580847723705e-06,
"loss": 1.9898,
"step": 13
},
{
"epoch": 0.01098901098901099,
"grad_norm": 0.3446689546108246,
"learning_rate": 9.89795918367347e-06,
"loss": 1.8995,
"step": 14
},
{
"epoch": 0.011773940345368918,
"grad_norm": 0.32618626952171326,
"learning_rate": 9.890109890109892e-06,
"loss": 1.8615,
"step": 15
},
{
"epoch": 0.012558869701726845,
"grad_norm": 0.3038957715034485,
"learning_rate": 9.882260596546312e-06,
"loss": 1.8644,
"step": 16
},
{
"epoch": 0.013343799058084773,
"grad_norm": 0.2970006465911865,
"learning_rate": 9.874411302982733e-06,
"loss": 1.8248,
"step": 17
},
{
"epoch": 0.0141287284144427,
"grad_norm": 0.2924885153770447,
"learning_rate": 9.866562009419153e-06,
"loss": 1.8896,
"step": 18
},
{
"epoch": 0.014913657770800628,
"grad_norm": 0.30948498845100403,
"learning_rate": 9.858712715855574e-06,
"loss": 2.0174,
"step": 19
},
{
"epoch": 0.015698587127158554,
"grad_norm": 0.27170419692993164,
"learning_rate": 9.850863422291994e-06,
"loss": 1.8595,
"step": 20
},
{
"epoch": 0.016483516483516484,
"grad_norm": 0.2529163360595703,
"learning_rate": 9.843014128728415e-06,
"loss": 1.8447,
"step": 21
},
{
"epoch": 0.01726844583987441,
"grad_norm": 0.26019564270973206,
"learning_rate": 9.835164835164835e-06,
"loss": 1.8919,
"step": 22
},
{
"epoch": 0.01805337519623234,
"grad_norm": 0.251017302274704,
"learning_rate": 9.827315541601256e-06,
"loss": 1.8662,
"step": 23
},
{
"epoch": 0.018838304552590265,
"grad_norm": 0.22893306612968445,
"learning_rate": 9.819466248037678e-06,
"loss": 1.7685,
"step": 24
},
{
"epoch": 0.019623233908948195,
"grad_norm": 0.22592955827713013,
"learning_rate": 9.811616954474098e-06,
"loss": 1.7604,
"step": 25
},
{
"epoch": 0.02040816326530612,
"grad_norm": 0.22802409529685974,
"learning_rate": 9.803767660910519e-06,
"loss": 1.8441,
"step": 26
},
{
"epoch": 0.02119309262166405,
"grad_norm": 0.21545954048633575,
"learning_rate": 9.795918367346939e-06,
"loss": 1.7258,
"step": 27
},
{
"epoch": 0.02197802197802198,
"grad_norm": 0.22040924429893494,
"learning_rate": 9.78806907378336e-06,
"loss": 1.7892,
"step": 28
},
{
"epoch": 0.022762951334379906,
"grad_norm": 0.2393616884946823,
"learning_rate": 9.780219780219781e-06,
"loss": 1.8538,
"step": 29
},
{
"epoch": 0.023547880690737835,
"grad_norm": 0.20232702791690826,
"learning_rate": 9.772370486656201e-06,
"loss": 1.7536,
"step": 30
},
{
"epoch": 0.02433281004709576,
"grad_norm": 0.2203434556722641,
"learning_rate": 9.764521193092623e-06,
"loss": 1.7088,
"step": 31
},
{
"epoch": 0.02511773940345369,
"grad_norm": 0.1975688636302948,
"learning_rate": 9.756671899529044e-06,
"loss": 1.7131,
"step": 32
},
{
"epoch": 0.025902668759811617,
"grad_norm": 0.19861450791358948,
"learning_rate": 9.748822605965464e-06,
"loss": 1.776,
"step": 33
},
{
"epoch": 0.026687598116169546,
"grad_norm": 0.19722306728363037,
"learning_rate": 9.740973312401885e-06,
"loss": 1.6889,
"step": 34
},
{
"epoch": 0.027472527472527472,
"grad_norm": 0.18340197205543518,
"learning_rate": 9.733124018838307e-06,
"loss": 1.6312,
"step": 35
},
{
"epoch": 0.0282574568288854,
"grad_norm": 0.20767861604690552,
"learning_rate": 9.725274725274726e-06,
"loss": 1.7293,
"step": 36
},
{
"epoch": 0.029042386185243328,
"grad_norm": 0.16855376958847046,
"learning_rate": 9.717425431711148e-06,
"loss": 1.5657,
"step": 37
},
{
"epoch": 0.029827315541601257,
"grad_norm": 0.19397929310798645,
"learning_rate": 9.709576138147567e-06,
"loss": 1.6843,
"step": 38
},
{
"epoch": 0.030612244897959183,
"grad_norm": 0.1965712457895279,
"learning_rate": 9.701726844583989e-06,
"loss": 1.7135,
"step": 39
},
{
"epoch": 0.03139717425431711,
"grad_norm": 0.18827113509178162,
"learning_rate": 9.693877551020408e-06,
"loss": 1.7002,
"step": 40
},
{
"epoch": 0.03218210361067504,
"grad_norm": 0.18982355296611786,
"learning_rate": 9.68602825745683e-06,
"loss": 1.6863,
"step": 41
},
{
"epoch": 0.03296703296703297,
"grad_norm": 0.18091963231563568,
"learning_rate": 9.67817896389325e-06,
"loss": 1.6784,
"step": 42
},
{
"epoch": 0.033751962323390894,
"grad_norm": 0.17879721522331238,
"learning_rate": 9.670329670329671e-06,
"loss": 1.6552,
"step": 43
},
{
"epoch": 0.03453689167974882,
"grad_norm": 0.19234947860240936,
"learning_rate": 9.66248037676609e-06,
"loss": 1.7857,
"step": 44
},
{
"epoch": 0.03532182103610675,
"grad_norm": 0.16198568046092987,
"learning_rate": 9.654631083202512e-06,
"loss": 1.5865,
"step": 45
},
{
"epoch": 0.03610675039246468,
"grad_norm": 0.16286341845989227,
"learning_rate": 9.646781789638933e-06,
"loss": 1.6461,
"step": 46
},
{
"epoch": 0.036891679748822605,
"grad_norm": 0.15107131004333496,
"learning_rate": 9.638932496075353e-06,
"loss": 1.6048,
"step": 47
},
{
"epoch": 0.03767660910518053,
"grad_norm": 0.16922970116138458,
"learning_rate": 9.631083202511775e-06,
"loss": 1.6644,
"step": 48
},
{
"epoch": 0.038461538461538464,
"grad_norm": 0.15843363106250763,
"learning_rate": 9.623233908948196e-06,
"loss": 1.5417,
"step": 49
},
{
"epoch": 0.03924646781789639,
"grad_norm": 0.17787528038024902,
"learning_rate": 9.615384615384616e-06,
"loss": 1.6869,
"step": 50
},
{
"epoch": 0.040031397174254316,
"grad_norm": 0.1719801276922226,
"learning_rate": 9.607535321821037e-06,
"loss": 1.6351,
"step": 51
},
{
"epoch": 0.04081632653061224,
"grad_norm": 0.1807231903076172,
"learning_rate": 9.599686028257459e-06,
"loss": 1.6697,
"step": 52
},
{
"epoch": 0.041601255886970175,
"grad_norm": 0.1894819140434265,
"learning_rate": 9.591836734693878e-06,
"loss": 1.6668,
"step": 53
},
{
"epoch": 0.0423861852433281,
"grad_norm": 0.16167454421520233,
"learning_rate": 9.5839874411303e-06,
"loss": 1.601,
"step": 54
},
{
"epoch": 0.04317111459968603,
"grad_norm": 0.15732350945472717,
"learning_rate": 9.576138147566721e-06,
"loss": 1.5291,
"step": 55
},
{
"epoch": 0.04395604395604396,
"grad_norm": 0.16725674271583557,
"learning_rate": 9.56828885400314e-06,
"loss": 1.6179,
"step": 56
},
{
"epoch": 0.044740973312401885,
"grad_norm": 0.1474035680294037,
"learning_rate": 9.560439560439562e-06,
"loss": 1.5644,
"step": 57
},
{
"epoch": 0.04552590266875981,
"grad_norm": 0.15364201366901398,
"learning_rate": 9.552590266875982e-06,
"loss": 1.6396,
"step": 58
},
{
"epoch": 0.04631083202511774,
"grad_norm": 0.19037100672721863,
"learning_rate": 9.544740973312403e-06,
"loss": 1.561,
"step": 59
},
{
"epoch": 0.04709576138147567,
"grad_norm": 0.14766716957092285,
"learning_rate": 9.536891679748823e-06,
"loss": 1.5349,
"step": 60
},
{
"epoch": 0.047880690737833596,
"grad_norm": 0.13931486010551453,
"learning_rate": 9.529042386185244e-06,
"loss": 1.4435,
"step": 61
},
{
"epoch": 0.04866562009419152,
"grad_norm": 0.1515316218137741,
"learning_rate": 9.521193092621664e-06,
"loss": 1.5309,
"step": 62
},
{
"epoch": 0.04945054945054945,
"grad_norm": 0.14389821887016296,
"learning_rate": 9.513343799058085e-06,
"loss": 1.4471,
"step": 63
},
{
"epoch": 0.05023547880690738,
"grad_norm": 0.14060775935649872,
"learning_rate": 9.505494505494505e-06,
"loss": 1.5074,
"step": 64
},
{
"epoch": 0.05102040816326531,
"grad_norm": 0.1455729603767395,
"learning_rate": 9.497645211930927e-06,
"loss": 1.5528,
"step": 65
},
{
"epoch": 0.05180533751962323,
"grad_norm": 0.14463943243026733,
"learning_rate": 9.489795918367348e-06,
"loss": 1.4809,
"step": 66
},
{
"epoch": 0.05259026687598116,
"grad_norm": 0.1522558331489563,
"learning_rate": 9.481946624803768e-06,
"loss": 1.4754,
"step": 67
},
{
"epoch": 0.05337519623233909,
"grad_norm": 0.1452169567346573,
"learning_rate": 9.474097331240189e-06,
"loss": 1.4867,
"step": 68
},
{
"epoch": 0.05416012558869702,
"grad_norm": 0.14983990788459778,
"learning_rate": 9.46624803767661e-06,
"loss": 1.4852,
"step": 69
},
{
"epoch": 0.054945054945054944,
"grad_norm": 0.15380005538463593,
"learning_rate": 9.45839874411303e-06,
"loss": 1.5105,
"step": 70
},
{
"epoch": 0.05572998430141287,
"grad_norm": 0.1510206013917923,
"learning_rate": 9.450549450549452e-06,
"loss": 1.5202,
"step": 71
},
{
"epoch": 0.0565149136577708,
"grad_norm": 0.1501176506280899,
"learning_rate": 9.442700156985873e-06,
"loss": 1.5119,
"step": 72
},
{
"epoch": 0.05729984301412873,
"grad_norm": 0.26818370819091797,
"learning_rate": 9.434850863422293e-06,
"loss": 1.5876,
"step": 73
},
{
"epoch": 0.058084772370486655,
"grad_norm": 0.1636001467704773,
"learning_rate": 9.427001569858714e-06,
"loss": 1.5482,
"step": 74
},
{
"epoch": 0.05886970172684458,
"grad_norm": 0.1485077142715454,
"learning_rate": 9.419152276295134e-06,
"loss": 1.5007,
"step": 75
},
{
"epoch": 0.059654631083202514,
"grad_norm": 0.14850012958049774,
"learning_rate": 9.411302982731555e-06,
"loss": 1.4424,
"step": 76
},
{
"epoch": 0.06043956043956044,
"grad_norm": 0.16702501475811005,
"learning_rate": 9.403453689167977e-06,
"loss": 1.5523,
"step": 77
},
{
"epoch": 0.061224489795918366,
"grad_norm": 0.15823277831077576,
"learning_rate": 9.395604395604396e-06,
"loss": 1.4958,
"step": 78
},
{
"epoch": 0.06200941915227629,
"grad_norm": 0.15339218080043793,
"learning_rate": 9.387755102040818e-06,
"loss": 1.4549,
"step": 79
},
{
"epoch": 0.06279434850863422,
"grad_norm": 0.15759187936782837,
"learning_rate": 9.379905808477237e-06,
"loss": 1.4164,
"step": 80
},
{
"epoch": 0.06357927786499215,
"grad_norm": 0.16237343847751617,
"learning_rate": 9.372056514913659e-06,
"loss": 1.5056,
"step": 81
},
{
"epoch": 0.06436420722135008,
"grad_norm": 0.145219624042511,
"learning_rate": 9.364207221350079e-06,
"loss": 1.4138,
"step": 82
},
{
"epoch": 0.065149136577708,
"grad_norm": 0.16004693508148193,
"learning_rate": 9.3563579277865e-06,
"loss": 1.4653,
"step": 83
},
{
"epoch": 0.06593406593406594,
"grad_norm": 0.17298448085784912,
"learning_rate": 9.34850863422292e-06,
"loss": 1.4232,
"step": 84
},
{
"epoch": 0.06671899529042387,
"grad_norm": 0.15541157126426697,
"learning_rate": 9.340659340659341e-06,
"loss": 1.4269,
"step": 85
},
{
"epoch": 0.06750392464678179,
"grad_norm": 0.1717188060283661,
"learning_rate": 9.33281004709576e-06,
"loss": 1.4827,
"step": 86
},
{
"epoch": 0.06828885400313972,
"grad_norm": 0.15374824404716492,
"learning_rate": 9.324960753532182e-06,
"loss": 1.4348,
"step": 87
},
{
"epoch": 0.06907378335949764,
"grad_norm": 0.16950677335262299,
"learning_rate": 9.317111459968604e-06,
"loss": 1.4779,
"step": 88
},
{
"epoch": 0.06985871271585557,
"grad_norm": 0.16479431092739105,
"learning_rate": 9.309262166405025e-06,
"loss": 1.4517,
"step": 89
},
{
"epoch": 0.0706436420722135,
"grad_norm": 0.16450008749961853,
"learning_rate": 9.301412872841445e-06,
"loss": 1.4352,
"step": 90
},
{
"epoch": 0.07142857142857142,
"grad_norm": 0.1584760844707489,
"learning_rate": 9.293563579277866e-06,
"loss": 1.3687,
"step": 91
},
{
"epoch": 0.07221350078492936,
"grad_norm": 0.17088983952999115,
"learning_rate": 9.285714285714288e-06,
"loss": 1.403,
"step": 92
},
{
"epoch": 0.07299843014128729,
"grad_norm": 0.16017332673072815,
"learning_rate": 9.277864992150707e-06,
"loss": 1.3504,
"step": 93
},
{
"epoch": 0.07378335949764521,
"grad_norm": 0.22771817445755005,
"learning_rate": 9.270015698587129e-06,
"loss": 1.4361,
"step": 94
},
{
"epoch": 0.07456828885400314,
"grad_norm": 0.1651688814163208,
"learning_rate": 9.262166405023548e-06,
"loss": 1.3933,
"step": 95
},
{
"epoch": 0.07535321821036106,
"grad_norm": 0.185661181807518,
"learning_rate": 9.25431711145997e-06,
"loss": 1.4446,
"step": 96
},
{
"epoch": 0.076138147566719,
"grad_norm": 0.17134390771389008,
"learning_rate": 9.24646781789639e-06,
"loss": 1.3812,
"step": 97
},
{
"epoch": 0.07692307692307693,
"grad_norm": 0.17040328681468964,
"learning_rate": 9.238618524332811e-06,
"loss": 1.393,
"step": 98
},
{
"epoch": 0.07770800627943485,
"grad_norm": 0.16197283565998077,
"learning_rate": 9.230769230769232e-06,
"loss": 1.3515,
"step": 99
},
{
"epoch": 0.07849293563579278,
"grad_norm": 0.18512940406799316,
"learning_rate": 9.222919937205652e-06,
"loss": 1.3884,
"step": 100
},
{
"epoch": 0.07927786499215071,
"grad_norm": 0.2023470401763916,
"learning_rate": 9.215070643642073e-06,
"loss": 1.4122,
"step": 101
},
{
"epoch": 0.08006279434850863,
"grad_norm": 0.1792641282081604,
"learning_rate": 9.207221350078493e-06,
"loss": 1.3936,
"step": 102
},
{
"epoch": 0.08084772370486656,
"grad_norm": 0.18946573138237,
"learning_rate": 9.199372056514915e-06,
"loss": 1.3314,
"step": 103
},
{
"epoch": 0.08163265306122448,
"grad_norm": 0.17217950522899628,
"learning_rate": 9.191522762951334e-06,
"loss": 1.3624,
"step": 104
},
{
"epoch": 0.08241758241758242,
"grad_norm": 0.1832340806722641,
"learning_rate": 9.183673469387756e-06,
"loss": 1.4058,
"step": 105
},
{
"epoch": 0.08320251177394035,
"grad_norm": 0.1757064312696457,
"learning_rate": 9.175824175824175e-06,
"loss": 1.3566,
"step": 106
},
{
"epoch": 0.08398744113029827,
"grad_norm": 0.17505265772342682,
"learning_rate": 9.167974882260597e-06,
"loss": 1.3217,
"step": 107
},
{
"epoch": 0.0847723704866562,
"grad_norm": 0.1719823032617569,
"learning_rate": 9.160125588697018e-06,
"loss": 1.3686,
"step": 108
},
{
"epoch": 0.08555729984301413,
"grad_norm": 0.16589918732643127,
"learning_rate": 9.15227629513344e-06,
"loss": 1.3998,
"step": 109
},
{
"epoch": 0.08634222919937205,
"grad_norm": 0.18314798176288605,
"learning_rate": 9.14442700156986e-06,
"loss": 1.399,
"step": 110
},
{
"epoch": 0.08712715855572999,
"grad_norm": 0.18713513016700745,
"learning_rate": 9.13657770800628e-06,
"loss": 1.4139,
"step": 111
},
{
"epoch": 0.08791208791208792,
"grad_norm": 0.16822576522827148,
"learning_rate": 9.128728414442702e-06,
"loss": 1.3247,
"step": 112
},
{
"epoch": 0.08869701726844584,
"grad_norm": 0.16645370423793793,
"learning_rate": 9.120879120879122e-06,
"loss": 1.3382,
"step": 113
},
{
"epoch": 0.08948194662480377,
"grad_norm": 0.16858340799808502,
"learning_rate": 9.113029827315543e-06,
"loss": 1.366,
"step": 114
},
{
"epoch": 0.09026687598116169,
"grad_norm": 0.15871913731098175,
"learning_rate": 9.105180533751963e-06,
"loss": 1.3446,
"step": 115
},
{
"epoch": 0.09105180533751962,
"grad_norm": 0.17642484605312347,
"learning_rate": 9.097331240188384e-06,
"loss": 1.4424,
"step": 116
},
{
"epoch": 0.09183673469387756,
"grad_norm": 0.16072145104408264,
"learning_rate": 9.089481946624804e-06,
"loss": 1.3247,
"step": 117
},
{
"epoch": 0.09262166405023547,
"grad_norm": 0.1545998454093933,
"learning_rate": 9.081632653061225e-06,
"loss": 1.3285,
"step": 118
},
{
"epoch": 0.09340659340659341,
"grad_norm": 0.15946722030639648,
"learning_rate": 9.073783359497645e-06,
"loss": 1.3123,
"step": 119
},
{
"epoch": 0.09419152276295134,
"grad_norm": 0.16009531915187836,
"learning_rate": 9.065934065934067e-06,
"loss": 1.3456,
"step": 120
},
{
"epoch": 0.09497645211930926,
"grad_norm": 0.15958304703235626,
"learning_rate": 9.058084772370488e-06,
"loss": 1.3039,
"step": 121
},
{
"epoch": 0.09576138147566719,
"grad_norm": 0.14737734198570251,
"learning_rate": 9.050235478806908e-06,
"loss": 1.3443,
"step": 122
},
{
"epoch": 0.09654631083202511,
"grad_norm": 0.15155759453773499,
"learning_rate": 9.042386185243329e-06,
"loss": 1.3459,
"step": 123
},
{
"epoch": 0.09733124018838304,
"grad_norm": 0.15477906167507172,
"learning_rate": 9.034536891679749e-06,
"loss": 1.3314,
"step": 124
},
{
"epoch": 0.09811616954474098,
"grad_norm": 0.14952191710472107,
"learning_rate": 9.02668759811617e-06,
"loss": 1.2907,
"step": 125
},
{
"epoch": 0.0989010989010989,
"grad_norm": 0.14685417711734772,
"learning_rate": 9.01883830455259e-06,
"loss": 1.3356,
"step": 126
},
{
"epoch": 0.09968602825745683,
"grad_norm": 0.1500052958726883,
"learning_rate": 9.010989010989011e-06,
"loss": 1.2724,
"step": 127
},
{
"epoch": 0.10047095761381476,
"grad_norm": 0.1418740600347519,
"learning_rate": 9.003139717425433e-06,
"loss": 1.3153,
"step": 128
},
{
"epoch": 0.10125588697017268,
"grad_norm": 0.15048982203006744,
"learning_rate": 8.995290423861854e-06,
"loss": 1.317,
"step": 129
},
{
"epoch": 0.10204081632653061,
"grad_norm": 0.1451820433139801,
"learning_rate": 8.987441130298274e-06,
"loss": 1.2907,
"step": 130
},
{
"epoch": 0.10282574568288853,
"grad_norm": 0.1553954780101776,
"learning_rate": 8.979591836734695e-06,
"loss": 1.3496,
"step": 131
},
{
"epoch": 0.10361067503924647,
"grad_norm": 0.13944025337696075,
"learning_rate": 8.971742543171117e-06,
"loss": 1.2967,
"step": 132
},
{
"epoch": 0.1043956043956044,
"grad_norm": 0.1505189687013626,
"learning_rate": 8.963893249607536e-06,
"loss": 1.242,
"step": 133
},
{
"epoch": 0.10518053375196232,
"grad_norm": 0.17538291215896606,
"learning_rate": 8.956043956043958e-06,
"loss": 1.3652,
"step": 134
},
{
"epoch": 0.10596546310832025,
"grad_norm": 0.1521396040916443,
"learning_rate": 8.948194662480377e-06,
"loss": 1.303,
"step": 135
},
{
"epoch": 0.10675039246467818,
"grad_norm": 0.14989960193634033,
"learning_rate": 8.940345368916799e-06,
"loss": 1.3348,
"step": 136
},
{
"epoch": 0.1075353218210361,
"grad_norm": 0.14487774670124054,
"learning_rate": 8.932496075353219e-06,
"loss": 1.307,
"step": 137
},
{
"epoch": 0.10832025117739404,
"grad_norm": 0.1494406759738922,
"learning_rate": 8.92464678178964e-06,
"loss": 1.3126,
"step": 138
},
{
"epoch": 0.10910518053375197,
"grad_norm": 0.16444607079029083,
"learning_rate": 8.91679748822606e-06,
"loss": 1.2694,
"step": 139
},
{
"epoch": 0.10989010989010989,
"grad_norm": 0.15526680648326874,
"learning_rate": 8.908948194662481e-06,
"loss": 1.2646,
"step": 140
},
{
"epoch": 0.11067503924646782,
"grad_norm": 0.15668538212776184,
"learning_rate": 8.9010989010989e-06,
"loss": 1.2587,
"step": 141
},
{
"epoch": 0.11145996860282574,
"grad_norm": 0.14996470510959625,
"learning_rate": 8.893249607535322e-06,
"loss": 1.3122,
"step": 142
},
{
"epoch": 0.11224489795918367,
"grad_norm": 0.14894387125968933,
"learning_rate": 8.885400313971744e-06,
"loss": 1.2909,
"step": 143
},
{
"epoch": 0.1130298273155416,
"grad_norm": 0.18087385594844818,
"learning_rate": 8.877551020408163e-06,
"loss": 1.3161,
"step": 144
},
{
"epoch": 0.11381475667189953,
"grad_norm": 0.15965577960014343,
"learning_rate": 8.869701726844585e-06,
"loss": 1.2683,
"step": 145
},
{
"epoch": 0.11459968602825746,
"grad_norm": 0.15005330741405487,
"learning_rate": 8.861852433281004e-06,
"loss": 1.3083,
"step": 146
},
{
"epoch": 0.11538461538461539,
"grad_norm": 0.14711208641529083,
"learning_rate": 8.854003139717426e-06,
"loss": 1.3019,
"step": 147
},
{
"epoch": 0.11616954474097331,
"grad_norm": 0.14721107482910156,
"learning_rate": 8.846153846153847e-06,
"loss": 1.2469,
"step": 148
},
{
"epoch": 0.11695447409733124,
"grad_norm": 0.16075782477855682,
"learning_rate": 8.838304552590269e-06,
"loss": 1.2696,
"step": 149
},
{
"epoch": 0.11773940345368916,
"grad_norm": 0.14981156587600708,
"learning_rate": 8.830455259026688e-06,
"loss": 1.2416,
"step": 150
},
{
"epoch": 0.1185243328100471,
"grad_norm": 0.15048524737358093,
"learning_rate": 8.82260596546311e-06,
"loss": 1.235,
"step": 151
},
{
"epoch": 0.11930926216640503,
"grad_norm": 0.1528954803943634,
"learning_rate": 8.81475667189953e-06,
"loss": 1.3038,
"step": 152
},
{
"epoch": 0.12009419152276295,
"grad_norm": 0.15498745441436768,
"learning_rate": 8.80690737833595e-06,
"loss": 1.3014,
"step": 153
},
{
"epoch": 0.12087912087912088,
"grad_norm": 0.15970808267593384,
"learning_rate": 8.799058084772372e-06,
"loss": 1.2824,
"step": 154
},
{
"epoch": 0.12166405023547881,
"grad_norm": 0.15735112130641937,
"learning_rate": 8.791208791208792e-06,
"loss": 1.2684,
"step": 155
},
{
"epoch": 0.12244897959183673,
"grad_norm": 0.15894393622875214,
"learning_rate": 8.783359497645213e-06,
"loss": 1.2283,
"step": 156
},
{
"epoch": 0.12323390894819466,
"grad_norm": 0.16271716356277466,
"learning_rate": 8.775510204081633e-06,
"loss": 1.2974,
"step": 157
},
{
"epoch": 0.12401883830455258,
"grad_norm": 0.15001676976680756,
"learning_rate": 8.767660910518054e-06,
"loss": 1.2688,
"step": 158
},
{
"epoch": 0.12480376766091052,
"grad_norm": 0.1491970270872116,
"learning_rate": 8.759811616954474e-06,
"loss": 1.2482,
"step": 159
},
{
"epoch": 0.12558869701726844,
"grad_norm": 0.15794126689434052,
"learning_rate": 8.751962323390896e-06,
"loss": 1.2605,
"step": 160
},
{
"epoch": 0.12637362637362637,
"grad_norm": 0.14844392240047455,
"learning_rate": 8.744113029827315e-06,
"loss": 1.2332,
"step": 161
},
{
"epoch": 0.1271585557299843,
"grad_norm": 0.16946828365325928,
"learning_rate": 8.736263736263737e-06,
"loss": 1.3136,
"step": 162
},
{
"epoch": 0.12794348508634223,
"grad_norm": 0.14547237753868103,
"learning_rate": 8.728414442700158e-06,
"loss": 1.2409,
"step": 163
},
{
"epoch": 0.12872841444270017,
"grad_norm": 0.15002243220806122,
"learning_rate": 8.720565149136578e-06,
"loss": 1.1916,
"step": 164
},
{
"epoch": 0.12951334379905807,
"grad_norm": 0.1735372096300125,
"learning_rate": 8.712715855573e-06,
"loss": 1.2371,
"step": 165
},
{
"epoch": 0.130298273155416,
"grad_norm": 0.18110059201717377,
"learning_rate": 8.704866562009419e-06,
"loss": 1.3246,
"step": 166
},
{
"epoch": 0.13108320251177394,
"grad_norm": 0.16027387976646423,
"learning_rate": 8.69701726844584e-06,
"loss": 1.2489,
"step": 167
},
{
"epoch": 0.13186813186813187,
"grad_norm": 0.15014226734638214,
"learning_rate": 8.689167974882262e-06,
"loss": 1.2068,
"step": 168
},
{
"epoch": 0.1326530612244898,
"grad_norm": 0.15863986313343048,
"learning_rate": 8.681318681318681e-06,
"loss": 1.2825,
"step": 169
},
{
"epoch": 0.13343799058084774,
"grad_norm": 0.17867471277713776,
"learning_rate": 8.673469387755103e-06,
"loss": 1.2607,
"step": 170
},
{
"epoch": 0.13422291993720564,
"grad_norm": 0.1596807986497879,
"learning_rate": 8.665620094191524e-06,
"loss": 1.2067,
"step": 171
},
{
"epoch": 0.13500784929356358,
"grad_norm": 0.1603277176618576,
"learning_rate": 8.657770800627944e-06,
"loss": 1.2414,
"step": 172
},
{
"epoch": 0.1357927786499215,
"grad_norm": 0.26285287737846375,
"learning_rate": 8.649921507064365e-06,
"loss": 1.1949,
"step": 173
},
{
"epoch": 0.13657770800627944,
"grad_norm": 0.15682700276374817,
"learning_rate": 8.642072213500787e-06,
"loss": 1.1832,
"step": 174
},
{
"epoch": 0.13736263736263737,
"grad_norm": 0.16141754388809204,
"learning_rate": 8.634222919937206e-06,
"loss": 1.2334,
"step": 175
},
{
"epoch": 0.13814756671899528,
"grad_norm": 0.18615436553955078,
"learning_rate": 8.626373626373628e-06,
"loss": 1.256,
"step": 176
},
{
"epoch": 0.1389324960753532,
"grad_norm": 0.15746115148067474,
"learning_rate": 8.618524332810048e-06,
"loss": 1.2267,
"step": 177
},
{
"epoch": 0.13971742543171115,
"grad_norm": 0.16463595628738403,
"learning_rate": 8.610675039246469e-06,
"loss": 1.2638,
"step": 178
},
{
"epoch": 0.14050235478806908,
"grad_norm": 0.15357612073421478,
"learning_rate": 8.602825745682889e-06,
"loss": 1.2262,
"step": 179
},
{
"epoch": 0.141287284144427,
"grad_norm": 0.16407904028892517,
"learning_rate": 8.59497645211931e-06,
"loss": 1.1631,
"step": 180
},
{
"epoch": 0.14207221350078492,
"grad_norm": 0.15864725410938263,
"learning_rate": 8.58712715855573e-06,
"loss": 1.1992,
"step": 181
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.16168276965618134,
"learning_rate": 8.579277864992151e-06,
"loss": 1.2385,
"step": 182
},
{
"epoch": 0.14364207221350078,
"grad_norm": 0.15585263073444366,
"learning_rate": 8.571428571428571e-06,
"loss": 1.2105,
"step": 183
},
{
"epoch": 0.14442700156985872,
"grad_norm": 0.15923231840133667,
"learning_rate": 8.563579277864992e-06,
"loss": 1.2208,
"step": 184
},
{
"epoch": 0.14521193092621665,
"grad_norm": 0.1669979989528656,
"learning_rate": 8.555729984301414e-06,
"loss": 1.2528,
"step": 185
},
{
"epoch": 0.14599686028257458,
"grad_norm": 0.1686784327030182,
"learning_rate": 8.547880690737833e-06,
"loss": 1.2136,
"step": 186
},
{
"epoch": 0.14678178963893249,
"grad_norm": 0.16007350385189056,
"learning_rate": 8.540031397174255e-06,
"loss": 1.2197,
"step": 187
},
{
"epoch": 0.14756671899529042,
"grad_norm": 0.15759383141994476,
"learning_rate": 8.532182103610676e-06,
"loss": 1.2286,
"step": 188
},
{
"epoch": 0.14835164835164835,
"grad_norm": 0.1621437519788742,
"learning_rate": 8.524332810047096e-06,
"loss": 1.1928,
"step": 189
},
{
"epoch": 0.14913657770800628,
"grad_norm": 0.1673436015844345,
"learning_rate": 8.516483516483517e-06,
"loss": 1.2709,
"step": 190
},
{
"epoch": 0.14992150706436422,
"grad_norm": 0.15842801332473755,
"learning_rate": 8.508634222919939e-06,
"loss": 1.213,
"step": 191
},
{
"epoch": 0.15070643642072212,
"grad_norm": 0.16606052219867706,
"learning_rate": 8.500784929356358e-06,
"loss": 1.2274,
"step": 192
},
{
"epoch": 0.15149136577708006,
"grad_norm": 0.24337609112262726,
"learning_rate": 8.49293563579278e-06,
"loss": 1.2763,
"step": 193
},
{
"epoch": 0.152276295133438,
"grad_norm": 0.1524026244878769,
"learning_rate": 8.4850863422292e-06,
"loss": 1.1981,
"step": 194
},
{
"epoch": 0.15306122448979592,
"grad_norm": 0.16204509139060974,
"learning_rate": 8.477237048665621e-06,
"loss": 1.2361,
"step": 195
},
{
"epoch": 0.15384615384615385,
"grad_norm": 0.16638584434986115,
"learning_rate": 8.469387755102042e-06,
"loss": 1.2309,
"step": 196
},
{
"epoch": 0.1546310832025118,
"grad_norm": 0.16714318096637726,
"learning_rate": 8.461538461538462e-06,
"loss": 1.2692,
"step": 197
},
{
"epoch": 0.1554160125588697,
"grad_norm": 0.168153315782547,
"learning_rate": 8.453689167974884e-06,
"loss": 1.2476,
"step": 198
},
{
"epoch": 0.15620094191522763,
"grad_norm": 0.1681162267923355,
"learning_rate": 8.445839874411303e-06,
"loss": 1.2504,
"step": 199
},
{
"epoch": 0.15698587127158556,
"grad_norm": 0.16602487862110138,
"learning_rate": 8.437990580847725e-06,
"loss": 1.2636,
"step": 200
},
{
"epoch": 0.1577708006279435,
"grad_norm": 0.1714046150445938,
"learning_rate": 8.430141287284144e-06,
"loss": 1.2159,
"step": 201
},
{
"epoch": 0.15855572998430142,
"grad_norm": 0.168313130736351,
"learning_rate": 8.422291993720566e-06,
"loss": 1.1932,
"step": 202
},
{
"epoch": 0.15934065934065933,
"grad_norm": 0.1813940554857254,
"learning_rate": 8.414442700156985e-06,
"loss": 1.2291,
"step": 203
},
{
"epoch": 0.16012558869701726,
"grad_norm": 0.16745160520076752,
"learning_rate": 8.406593406593407e-06,
"loss": 1.2088,
"step": 204
},
{
"epoch": 0.1609105180533752,
"grad_norm": 0.1752898395061493,
"learning_rate": 8.398744113029828e-06,
"loss": 1.2324,
"step": 205
},
{
"epoch": 0.16169544740973313,
"grad_norm": 0.1678934097290039,
"learning_rate": 8.390894819466248e-06,
"loss": 1.2545,
"step": 206
},
{
"epoch": 0.16248037676609106,
"grad_norm": 0.1756390780210495,
"learning_rate": 8.38304552590267e-06,
"loss": 1.2421,
"step": 207
},
{
"epoch": 0.16326530612244897,
"grad_norm": 0.17289415001869202,
"learning_rate": 8.37519623233909e-06,
"loss": 1.2063,
"step": 208
},
{
"epoch": 0.1640502354788069,
"grad_norm": 0.17906337976455688,
"learning_rate": 8.36734693877551e-06,
"loss": 1.2598,
"step": 209
},
{
"epoch": 0.16483516483516483,
"grad_norm": 0.17102067172527313,
"learning_rate": 8.359497645211932e-06,
"loss": 1.1696,
"step": 210
},
{
"epoch": 0.16562009419152277,
"grad_norm": 0.16523365676403046,
"learning_rate": 8.351648351648353e-06,
"loss": 1.1769,
"step": 211
},
{
"epoch": 0.1664050235478807,
"grad_norm": 0.1749919205904007,
"learning_rate": 8.343799058084773e-06,
"loss": 1.1784,
"step": 212
},
{
"epoch": 0.16718995290423863,
"grad_norm": 0.17963068187236786,
"learning_rate": 8.335949764521194e-06,
"loss": 1.1859,
"step": 213
},
{
"epoch": 0.16797488226059654,
"grad_norm": 0.17333124577999115,
"learning_rate": 8.328100470957614e-06,
"loss": 1.2148,
"step": 214
},
{
"epoch": 0.16875981161695447,
"grad_norm": 0.17331229150295258,
"learning_rate": 8.320251177394036e-06,
"loss": 1.2404,
"step": 215
},
{
"epoch": 0.1695447409733124,
"grad_norm": 0.23610031604766846,
"learning_rate": 8.312401883830455e-06,
"loss": 1.222,
"step": 216
},
{
"epoch": 0.17032967032967034,
"grad_norm": 0.19044527411460876,
"learning_rate": 8.304552590266877e-06,
"loss": 1.1923,
"step": 217
},
{
"epoch": 0.17111459968602827,
"grad_norm": 0.16924557089805603,
"learning_rate": 8.296703296703298e-06,
"loss": 1.2481,
"step": 218
},
{
"epoch": 0.17189952904238617,
"grad_norm": 0.17461296916007996,
"learning_rate": 8.288854003139718e-06,
"loss": 1.204,
"step": 219
},
{
"epoch": 0.1726844583987441,
"grad_norm": 0.16900260746479034,
"learning_rate": 8.281004709576139e-06,
"loss": 1.158,
"step": 220
},
{
"epoch": 0.17346938775510204,
"grad_norm": 0.1738884598016739,
"learning_rate": 8.273155416012559e-06,
"loss": 1.1835,
"step": 221
},
{
"epoch": 0.17425431711145997,
"grad_norm": 0.18512043356895447,
"learning_rate": 8.26530612244898e-06,
"loss": 1.202,
"step": 222
},
{
"epoch": 0.1750392464678179,
"grad_norm": 0.15882278978824615,
"learning_rate": 8.2574568288854e-06,
"loss": 1.1717,
"step": 223
},
{
"epoch": 0.17582417582417584,
"grad_norm": 0.1748121678829193,
"learning_rate": 8.249607535321821e-06,
"loss": 1.2407,
"step": 224
},
{
"epoch": 0.17660910518053374,
"grad_norm": 0.17194059491157532,
"learning_rate": 8.241758241758243e-06,
"loss": 1.2104,
"step": 225
},
{
"epoch": 0.17739403453689168,
"grad_norm": 0.17927075922489166,
"learning_rate": 8.233908948194662e-06,
"loss": 1.1295,
"step": 226
},
{
"epoch": 0.1781789638932496,
"grad_norm": 0.17285114526748657,
"learning_rate": 8.226059654631084e-06,
"loss": 1.1364,
"step": 227
},
{
"epoch": 0.17896389324960754,
"grad_norm": 0.18146753311157227,
"learning_rate": 8.218210361067505e-06,
"loss": 1.2123,
"step": 228
},
{
"epoch": 0.17974882260596547,
"grad_norm": 0.17958636581897736,
"learning_rate": 8.210361067503925e-06,
"loss": 1.2155,
"step": 229
},
{
"epoch": 0.18053375196232338,
"grad_norm": 0.1809559315443039,
"learning_rate": 8.202511773940346e-06,
"loss": 1.2193,
"step": 230
},
{
"epoch": 0.1813186813186813,
"grad_norm": 0.17428240180015564,
"learning_rate": 8.194662480376768e-06,
"loss": 1.1756,
"step": 231
},
{
"epoch": 0.18210361067503925,
"grad_norm": 0.16993722319602966,
"learning_rate": 8.186813186813188e-06,
"loss": 1.1373,
"step": 232
},
{
"epoch": 0.18288854003139718,
"grad_norm": 0.16454678773880005,
"learning_rate": 8.178963893249609e-06,
"loss": 1.1778,
"step": 233
},
{
"epoch": 0.1836734693877551,
"grad_norm": 0.19709423184394836,
"learning_rate": 8.171114599686029e-06,
"loss": 1.1672,
"step": 234
},
{
"epoch": 0.18445839874411302,
"grad_norm": 0.17396849393844604,
"learning_rate": 8.16326530612245e-06,
"loss": 1.1614,
"step": 235
},
{
"epoch": 0.18524332810047095,
"grad_norm": 0.17194689810276031,
"learning_rate": 8.15541601255887e-06,
"loss": 1.1568,
"step": 236
},
{
"epoch": 0.18602825745682888,
"grad_norm": 0.17015020549297333,
"learning_rate": 8.147566718995291e-06,
"loss": 1.1737,
"step": 237
},
{
"epoch": 0.18681318681318682,
"grad_norm": 0.181587353348732,
"learning_rate": 8.139717425431711e-06,
"loss": 1.1874,
"step": 238
},
{
"epoch": 0.18759811616954475,
"grad_norm": 0.17715583741664886,
"learning_rate": 8.131868131868132e-06,
"loss": 1.2042,
"step": 239
},
{
"epoch": 0.18838304552590268,
"grad_norm": 0.18029291927814484,
"learning_rate": 8.124018838304554e-06,
"loss": 1.2087,
"step": 240
},
{
"epoch": 0.1891679748822606,
"grad_norm": 0.1827882081270218,
"learning_rate": 8.116169544740973e-06,
"loss": 1.1878,
"step": 241
},
{
"epoch": 0.18995290423861852,
"grad_norm": 0.18994055688381195,
"learning_rate": 8.108320251177395e-06,
"loss": 1.2756,
"step": 242
},
{
"epoch": 0.19073783359497645,
"grad_norm": 0.18137842416763306,
"learning_rate": 8.100470957613814e-06,
"loss": 1.1633,
"step": 243
},
{
"epoch": 0.19152276295133439,
"grad_norm": 0.18730993568897247,
"learning_rate": 8.092621664050236e-06,
"loss": 1.1659,
"step": 244
},
{
"epoch": 0.19230769230769232,
"grad_norm": 0.17532621324062347,
"learning_rate": 8.084772370486657e-06,
"loss": 1.212,
"step": 245
},
{
"epoch": 0.19309262166405022,
"grad_norm": 0.18628445267677307,
"learning_rate": 8.076923076923077e-06,
"loss": 1.1684,
"step": 246
},
{
"epoch": 0.19387755102040816,
"grad_norm": 0.1802094727754593,
"learning_rate": 8.069073783359498e-06,
"loss": 1.2242,
"step": 247
},
{
"epoch": 0.1946624803767661,
"grad_norm": 0.18173186480998993,
"learning_rate": 8.06122448979592e-06,
"loss": 1.2428,
"step": 248
},
{
"epoch": 0.19544740973312402,
"grad_norm": 0.17413657903671265,
"learning_rate": 8.05337519623234e-06,
"loss": 1.1303,
"step": 249
},
{
"epoch": 0.19623233908948196,
"grad_norm": 0.18897269666194916,
"learning_rate": 8.045525902668761e-06,
"loss": 1.2011,
"step": 250
},
{
"epoch": 0.1970172684458399,
"grad_norm": 0.18273556232452393,
"learning_rate": 8.037676609105182e-06,
"loss": 1.195,
"step": 251
},
{
"epoch": 0.1978021978021978,
"grad_norm": 0.17363931238651276,
"learning_rate": 8.029827315541602e-06,
"loss": 1.151,
"step": 252
},
{
"epoch": 0.19858712715855573,
"grad_norm": 0.19355787336826324,
"learning_rate": 8.021978021978023e-06,
"loss": 1.1883,
"step": 253
},
{
"epoch": 0.19937205651491366,
"grad_norm": 0.1852579414844513,
"learning_rate": 8.014128728414443e-06,
"loss": 1.1829,
"step": 254
},
{
"epoch": 0.2001569858712716,
"grad_norm": 0.18897081911563873,
"learning_rate": 8.006279434850865e-06,
"loss": 1.1633,
"step": 255
},
{
"epoch": 0.20094191522762953,
"grad_norm": 0.177345871925354,
"learning_rate": 7.998430141287284e-06,
"loss": 1.1543,
"step": 256
},
{
"epoch": 0.20172684458398743,
"grad_norm": 0.19197995960712433,
"learning_rate": 7.990580847723706e-06,
"loss": 1.1124,
"step": 257
},
{
"epoch": 0.20251177394034536,
"grad_norm": 0.1918957382440567,
"learning_rate": 7.982731554160125e-06,
"loss": 1.2175,
"step": 258
},
{
"epoch": 0.2032967032967033,
"grad_norm": 0.3751870095729828,
"learning_rate": 7.974882260596547e-06,
"loss": 1.208,
"step": 259
},
{
"epoch": 0.20408163265306123,
"grad_norm": 0.19366663694381714,
"learning_rate": 7.967032967032966e-06,
"loss": 1.1758,
"step": 260
},
{
"epoch": 0.20486656200941916,
"grad_norm": 0.1843341588973999,
"learning_rate": 7.959183673469388e-06,
"loss": 1.1385,
"step": 261
},
{
"epoch": 0.20565149136577707,
"grad_norm": 0.17850859463214874,
"learning_rate": 7.95133437990581e-06,
"loss": 1.1707,
"step": 262
},
{
"epoch": 0.206436420722135,
"grad_norm": 0.21179649233818054,
"learning_rate": 7.943485086342229e-06,
"loss": 1.2079,
"step": 263
},
{
"epoch": 0.20722135007849293,
"grad_norm": 0.21911092102527618,
"learning_rate": 7.93563579277865e-06,
"loss": 1.2071,
"step": 264
},
{
"epoch": 0.20800627943485087,
"grad_norm": 0.1800030916929245,
"learning_rate": 7.927786499215072e-06,
"loss": 1.1897,
"step": 265
},
{
"epoch": 0.2087912087912088,
"grad_norm": 0.20178881287574768,
"learning_rate": 7.919937205651492e-06,
"loss": 1.1307,
"step": 266
},
{
"epoch": 0.20957613814756673,
"grad_norm": 0.18904000520706177,
"learning_rate": 7.912087912087913e-06,
"loss": 1.1234,
"step": 267
},
{
"epoch": 0.21036106750392464,
"grad_norm": 0.18753568828105927,
"learning_rate": 7.904238618524334e-06,
"loss": 1.1312,
"step": 268
},
{
"epoch": 0.21114599686028257,
"grad_norm": 0.18907895684242249,
"learning_rate": 7.896389324960754e-06,
"loss": 1.2075,
"step": 269
},
{
"epoch": 0.2119309262166405,
"grad_norm": 0.20011885464191437,
"learning_rate": 7.888540031397175e-06,
"loss": 1.1175,
"step": 270
},
{
"epoch": 0.21271585557299844,
"grad_norm": 0.1855919063091278,
"learning_rate": 7.880690737833597e-06,
"loss": 1.1581,
"step": 271
},
{
"epoch": 0.21350078492935637,
"grad_norm": 0.18904899060726166,
"learning_rate": 7.872841444270017e-06,
"loss": 1.1513,
"step": 272
},
{
"epoch": 0.21428571428571427,
"grad_norm": 0.18176890909671783,
"learning_rate": 7.864992150706438e-06,
"loss": 1.1448,
"step": 273
},
{
"epoch": 0.2150706436420722,
"grad_norm": 0.18389371037483215,
"learning_rate": 7.857142857142858e-06,
"loss": 1.1695,
"step": 274
},
{
"epoch": 0.21585557299843014,
"grad_norm": 0.1845601201057434,
"learning_rate": 7.849293563579279e-06,
"loss": 1.1295,
"step": 275
},
{
"epoch": 0.21664050235478807,
"grad_norm": 0.2147328108549118,
"learning_rate": 7.841444270015699e-06,
"loss": 1.1369,
"step": 276
},
{
"epoch": 0.217425431711146,
"grad_norm": 0.20094560086727142,
"learning_rate": 7.83359497645212e-06,
"loss": 1.1666,
"step": 277
},
{
"epoch": 0.21821036106750394,
"grad_norm": 0.1994454562664032,
"learning_rate": 7.82574568288854e-06,
"loss": 1.171,
"step": 278
},
{
"epoch": 0.21899529042386184,
"grad_norm": 0.19250410795211792,
"learning_rate": 7.817896389324961e-06,
"loss": 1.1709,
"step": 279
},
{
"epoch": 0.21978021978021978,
"grad_norm": 0.20511236786842346,
"learning_rate": 7.810047095761381e-06,
"loss": 1.1788,
"step": 280
},
{
"epoch": 0.2205651491365777,
"grad_norm": 0.2156459391117096,
"learning_rate": 7.802197802197802e-06,
"loss": 1.1543,
"step": 281
},
{
"epoch": 0.22135007849293564,
"grad_norm": 0.18635962903499603,
"learning_rate": 7.794348508634224e-06,
"loss": 1.1375,
"step": 282
},
{
"epoch": 0.22213500784929358,
"grad_norm": 0.1912240833044052,
"learning_rate": 7.786499215070644e-06,
"loss": 1.1901,
"step": 283
},
{
"epoch": 0.22291993720565148,
"grad_norm": 0.19378046691417694,
"learning_rate": 7.778649921507065e-06,
"loss": 1.2012,
"step": 284
},
{
"epoch": 0.2237048665620094,
"grad_norm": 0.19641022384166718,
"learning_rate": 7.770800627943486e-06,
"loss": 1.1817,
"step": 285
},
{
"epoch": 0.22448979591836735,
"grad_norm": 0.1922275871038437,
"learning_rate": 7.762951334379906e-06,
"loss": 1.182,
"step": 286
},
{
"epoch": 0.22527472527472528,
"grad_norm": 0.19869813323020935,
"learning_rate": 7.755102040816327e-06,
"loss": 1.1975,
"step": 287
},
{
"epoch": 0.2260596546310832,
"grad_norm": 0.183540940284729,
"learning_rate": 7.747252747252749e-06,
"loss": 1.2028,
"step": 288
},
{
"epoch": 0.22684458398744112,
"grad_norm": 0.20018337666988373,
"learning_rate": 7.739403453689169e-06,
"loss": 1.2147,
"step": 289
},
{
"epoch": 0.22762951334379905,
"grad_norm": 0.2085750699043274,
"learning_rate": 7.73155416012559e-06,
"loss": 1.1198,
"step": 290
},
{
"epoch": 0.22841444270015698,
"grad_norm": 0.19977134466171265,
"learning_rate": 7.72370486656201e-06,
"loss": 1.1528,
"step": 291
},
{
"epoch": 0.22919937205651492,
"grad_norm": 0.19265629351139069,
"learning_rate": 7.715855572998431e-06,
"loss": 1.1401,
"step": 292
},
{
"epoch": 0.22998430141287285,
"grad_norm": 0.191536083817482,
"learning_rate": 7.708006279434852e-06,
"loss": 1.1556,
"step": 293
},
{
"epoch": 0.23076923076923078,
"grad_norm": 0.2022550255060196,
"learning_rate": 7.700156985871272e-06,
"loss": 1.0994,
"step": 294
},
{
"epoch": 0.2315541601255887,
"grad_norm": 0.1965666264295578,
"learning_rate": 7.692307692307694e-06,
"loss": 1.0851,
"step": 295
},
{
"epoch": 0.23233908948194662,
"grad_norm": 0.23619407415390015,
"learning_rate": 7.684458398744113e-06,
"loss": 1.2371,
"step": 296
},
{
"epoch": 0.23312401883830455,
"grad_norm": 0.18914885818958282,
"learning_rate": 7.676609105180535e-06,
"loss": 1.1038,
"step": 297
},
{
"epoch": 0.23390894819466249,
"grad_norm": 0.18863645195960999,
"learning_rate": 7.668759811616954e-06,
"loss": 1.1318,
"step": 298
},
{
"epoch": 0.23469387755102042,
"grad_norm": 0.2066003382205963,
"learning_rate": 7.660910518053376e-06,
"loss": 1.1542,
"step": 299
},
{
"epoch": 0.23547880690737832,
"grad_norm": 0.21004410088062286,
"learning_rate": 7.653061224489796e-06,
"loss": 1.1647,
"step": 300
},
{
"epoch": 0.23626373626373626,
"grad_norm": 0.19493690133094788,
"learning_rate": 7.645211930926217e-06,
"loss": 1.1743,
"step": 301
},
{
"epoch": 0.2370486656200942,
"grad_norm": 0.1926044225692749,
"learning_rate": 7.637362637362638e-06,
"loss": 1.1333,
"step": 302
},
{
"epoch": 0.23783359497645212,
"grad_norm": 0.19471141695976257,
"learning_rate": 7.629513343799058e-06,
"loss": 1.0989,
"step": 303
},
{
"epoch": 0.23861852433281006,
"grad_norm": 0.1914125680923462,
"learning_rate": 7.62166405023548e-06,
"loss": 1.1427,
"step": 304
},
{
"epoch": 0.239403453689168,
"grad_norm": 0.21529747545719147,
"learning_rate": 7.6138147566719e-06,
"loss": 1.117,
"step": 305
},
{
"epoch": 0.2401883830455259,
"grad_norm": 0.2219187617301941,
"learning_rate": 7.605965463108321e-06,
"loss": 1.1584,
"step": 306
},
{
"epoch": 0.24097331240188383,
"grad_norm": 0.20323887467384338,
"learning_rate": 7.598116169544741e-06,
"loss": 1.1543,
"step": 307
},
{
"epoch": 0.24175824175824176,
"grad_norm": 0.19014237821102142,
"learning_rate": 7.5902668759811625e-06,
"loss": 1.1149,
"step": 308
},
{
"epoch": 0.2425431711145997,
"grad_norm": 0.2022504359483719,
"learning_rate": 7.582417582417583e-06,
"loss": 1.1835,
"step": 309
},
{
"epoch": 0.24332810047095763,
"grad_norm": 0.19809505343437195,
"learning_rate": 7.574568288854004e-06,
"loss": 1.0916,
"step": 310
},
{
"epoch": 0.24411302982731553,
"grad_norm": 0.19800910353660583,
"learning_rate": 7.566718995290424e-06,
"loss": 1.1467,
"step": 311
},
{
"epoch": 0.24489795918367346,
"grad_norm": 0.19982385635375977,
"learning_rate": 7.558869701726846e-06,
"loss": 1.1186,
"step": 312
},
{
"epoch": 0.2456828885400314,
"grad_norm": 0.19915273785591125,
"learning_rate": 7.551020408163265e-06,
"loss": 1.1743,
"step": 313
},
{
"epoch": 0.24646781789638933,
"grad_norm": 0.21240346133708954,
"learning_rate": 7.543171114599687e-06,
"loss": 1.1575,
"step": 314
},
{
"epoch": 0.24725274725274726,
"grad_norm": 0.19470131397247314,
"learning_rate": 7.535321821036108e-06,
"loss": 1.1745,
"step": 315
},
{
"epoch": 0.24803767660910517,
"grad_norm": 0.21574276685714722,
"learning_rate": 7.527472527472528e-06,
"loss": 1.1922,
"step": 316
},
{
"epoch": 0.2488226059654631,
"grad_norm": 0.1990724802017212,
"learning_rate": 7.519623233908949e-06,
"loss": 1.1765,
"step": 317
},
{
"epoch": 0.24960753532182103,
"grad_norm": 0.1972692310810089,
"learning_rate": 7.511773940345369e-06,
"loss": 1.1848,
"step": 318
},
{
"epoch": 0.25039246467817894,
"grad_norm": 0.21943879127502441,
"learning_rate": 7.50392464678179e-06,
"loss": 1.1103,
"step": 319
},
{
"epoch": 0.25117739403453687,
"grad_norm": 0.20259033143520355,
"learning_rate": 7.496075353218211e-06,
"loss": 1.1587,
"step": 320
},
{
"epoch": 0.2519623233908948,
"grad_norm": 0.20974001288414001,
"learning_rate": 7.488226059654632e-06,
"loss": 1.1161,
"step": 321
},
{
"epoch": 0.25274725274725274,
"grad_norm": 0.1978650540113449,
"learning_rate": 7.480376766091052e-06,
"loss": 1.1743,
"step": 322
},
{
"epoch": 0.25353218210361067,
"grad_norm": 0.21677835285663605,
"learning_rate": 7.472527472527473e-06,
"loss": 1.1915,
"step": 323
},
{
"epoch": 0.2543171114599686,
"grad_norm": 0.19082266092300415,
"learning_rate": 7.464678178963893e-06,
"loss": 1.1146,
"step": 324
},
{
"epoch": 0.25510204081632654,
"grad_norm": 0.20703904330730438,
"learning_rate": 7.4568288854003145e-06,
"loss": 1.1325,
"step": 325
},
{
"epoch": 0.25588697017268447,
"grad_norm": 0.20791569352149963,
"learning_rate": 7.448979591836736e-06,
"loss": 1.1569,
"step": 326
},
{
"epoch": 0.2566718995290424,
"grad_norm": 0.2217319905757904,
"learning_rate": 7.441130298273156e-06,
"loss": 1.1823,
"step": 327
},
{
"epoch": 0.25745682888540034,
"grad_norm": 0.20722460746765137,
"learning_rate": 7.433281004709577e-06,
"loss": 1.1482,
"step": 328
},
{
"epoch": 0.25824175824175827,
"grad_norm": 0.2060956358909607,
"learning_rate": 7.425431711145998e-06,
"loss": 1.1829,
"step": 329
},
{
"epoch": 0.25902668759811615,
"grad_norm": 0.21304012835025787,
"learning_rate": 7.417582417582418e-06,
"loss": 1.1984,
"step": 330
},
{
"epoch": 0.2598116169544741,
"grad_norm": 0.21216687560081482,
"learning_rate": 7.409733124018839e-06,
"loss": 1.1238,
"step": 331
},
{
"epoch": 0.260596546310832,
"grad_norm": 0.2066863626241684,
"learning_rate": 7.40188383045526e-06,
"loss": 1.1304,
"step": 332
},
{
"epoch": 0.26138147566718994,
"grad_norm": 0.2035655975341797,
"learning_rate": 7.39403453689168e-06,
"loss": 1.1423,
"step": 333
},
{
"epoch": 0.2621664050235479,
"grad_norm": 0.2093246579170227,
"learning_rate": 7.386185243328101e-06,
"loss": 1.185,
"step": 334
},
{
"epoch": 0.2629513343799058,
"grad_norm": 0.20066991448402405,
"learning_rate": 7.378335949764521e-06,
"loss": 1.0917,
"step": 335
},
{
"epoch": 0.26373626373626374,
"grad_norm": 0.21041403710842133,
"learning_rate": 7.370486656200942e-06,
"loss": 1.1487,
"step": 336
},
{
"epoch": 0.2645211930926217,
"grad_norm": 0.19787679612636566,
"learning_rate": 7.362637362637364e-06,
"loss": 1.1429,
"step": 337
},
{
"epoch": 0.2653061224489796,
"grad_norm": 0.2156287282705307,
"learning_rate": 7.3547880690737835e-06,
"loss": 1.1162,
"step": 338
},
{
"epoch": 0.26609105180533754,
"grad_norm": 0.23158158361911774,
"learning_rate": 7.346938775510205e-06,
"loss": 1.1608,
"step": 339
},
{
"epoch": 0.2668759811616955,
"grad_norm": 0.21346524357795715,
"learning_rate": 7.339089481946625e-06,
"loss": 1.1092,
"step": 340
},
{
"epoch": 0.26766091051805335,
"grad_norm": 0.2354096919298172,
"learning_rate": 7.331240188383047e-06,
"loss": 1.1448,
"step": 341
},
{
"epoch": 0.2684458398744113,
"grad_norm": 0.20579902827739716,
"learning_rate": 7.3233908948194665e-06,
"loss": 1.1627,
"step": 342
},
{
"epoch": 0.2692307692307692,
"grad_norm": 0.24660111963748932,
"learning_rate": 7.315541601255888e-06,
"loss": 1.1113,
"step": 343
},
{
"epoch": 0.27001569858712715,
"grad_norm": 0.21383073925971985,
"learning_rate": 7.307692307692308e-06,
"loss": 1.1361,
"step": 344
},
{
"epoch": 0.2708006279434851,
"grad_norm": 0.2156330794095993,
"learning_rate": 7.299843014128729e-06,
"loss": 1.0608,
"step": 345
},
{
"epoch": 0.271585557299843,
"grad_norm": 0.20856386423110962,
"learning_rate": 7.29199372056515e-06,
"loss": 1.1545,
"step": 346
},
{
"epoch": 0.27237048665620095,
"grad_norm": 0.21835865080356598,
"learning_rate": 7.28414442700157e-06,
"loss": 1.1998,
"step": 347
},
{
"epoch": 0.2731554160125589,
"grad_norm": 0.23513004183769226,
"learning_rate": 7.2762951334379916e-06,
"loss": 1.2048,
"step": 348
},
{
"epoch": 0.2739403453689168,
"grad_norm": 0.22506913542747498,
"learning_rate": 7.268445839874412e-06,
"loss": 1.1828,
"step": 349
},
{
"epoch": 0.27472527472527475,
"grad_norm": 0.216568723320961,
"learning_rate": 7.260596546310833e-06,
"loss": 1.1928,
"step": 350
},
{
"epoch": 0.2755102040816326,
"grad_norm": 0.2345465123653412,
"learning_rate": 7.252747252747253e-06,
"loss": 1.1723,
"step": 351
},
{
"epoch": 0.27629513343799056,
"grad_norm": 0.21116310358047485,
"learning_rate": 7.244897959183675e-06,
"loss": 1.1362,
"step": 352
},
{
"epoch": 0.2770800627943485,
"grad_norm": 0.20539937913417816,
"learning_rate": 7.237048665620094e-06,
"loss": 1.1692,
"step": 353
},
{
"epoch": 0.2778649921507064,
"grad_norm": 0.22223587334156036,
"learning_rate": 7.229199372056516e-06,
"loss": 1.1589,
"step": 354
},
{
"epoch": 0.27864992150706436,
"grad_norm": 0.20060519874095917,
"learning_rate": 7.2213500784929355e-06,
"loss": 1.1264,
"step": 355
},
{
"epoch": 0.2794348508634223,
"grad_norm": 0.21847526729106903,
"learning_rate": 7.213500784929357e-06,
"loss": 1.1401,
"step": 356
},
{
"epoch": 0.2802197802197802,
"grad_norm": 0.22963590919971466,
"learning_rate": 7.205651491365777e-06,
"loss": 1.1523,
"step": 357
},
{
"epoch": 0.28100470957613816,
"grad_norm": 0.21631349623203278,
"learning_rate": 7.197802197802198e-06,
"loss": 1.1265,
"step": 358
},
{
"epoch": 0.2817896389324961,
"grad_norm": 0.21508990228176117,
"learning_rate": 7.189952904238619e-06,
"loss": 1.1125,
"step": 359
},
{
"epoch": 0.282574568288854,
"grad_norm": 0.2024298906326294,
"learning_rate": 7.18210361067504e-06,
"loss": 1.0864,
"step": 360
},
{
"epoch": 0.28335949764521196,
"grad_norm": 0.21588054299354553,
"learning_rate": 7.174254317111461e-06,
"loss": 1.1265,
"step": 361
},
{
"epoch": 0.28414442700156983,
"grad_norm": 0.3438448905944824,
"learning_rate": 7.166405023547881e-06,
"loss": 1.1087,
"step": 362
},
{
"epoch": 0.28492935635792777,
"grad_norm": 0.23037710785865784,
"learning_rate": 7.1585557299843024e-06,
"loss": 1.1683,
"step": 363
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.371964693069458,
"learning_rate": 7.150706436420722e-06,
"loss": 1.1306,
"step": 364
},
{
"epoch": 0.28649921507064363,
"grad_norm": 0.2277277410030365,
"learning_rate": 7.1428571428571436e-06,
"loss": 1.1758,
"step": 365
},
{
"epoch": 0.28728414442700156,
"grad_norm": 0.20543402433395386,
"learning_rate": 7.135007849293564e-06,
"loss": 1.1501,
"step": 366
},
{
"epoch": 0.2880690737833595,
"grad_norm": 0.2121914029121399,
"learning_rate": 7.127158555729985e-06,
"loss": 1.1247,
"step": 367
},
{
"epoch": 0.28885400313971743,
"grad_norm": 0.22225429117679596,
"learning_rate": 7.119309262166405e-06,
"loss": 1.1231,
"step": 368
},
{
"epoch": 0.28963893249607536,
"grad_norm": 0.21142037212848663,
"learning_rate": 7.111459968602827e-06,
"loss": 1.1528,
"step": 369
},
{
"epoch": 0.2904238618524333,
"grad_norm": 0.2387160062789917,
"learning_rate": 7.103610675039247e-06,
"loss": 1.1037,
"step": 370
},
{
"epoch": 0.29120879120879123,
"grad_norm": 0.2219192534685135,
"learning_rate": 7.095761381475668e-06,
"loss": 1.1385,
"step": 371
},
{
"epoch": 0.29199372056514916,
"grad_norm": 0.21481142938137054,
"learning_rate": 7.087912087912089e-06,
"loss": 1.1199,
"step": 372
},
{
"epoch": 0.29277864992150704,
"grad_norm": 0.2126331329345703,
"learning_rate": 7.080062794348509e-06,
"loss": 1.1346,
"step": 373
},
{
"epoch": 0.29356357927786497,
"grad_norm": 0.23164328932762146,
"learning_rate": 7.07221350078493e-06,
"loss": 1.1783,
"step": 374
},
{
"epoch": 0.2943485086342229,
"grad_norm": 0.23608547449111938,
"learning_rate": 7.06436420722135e-06,
"loss": 1.1549,
"step": 375
},
{
"epoch": 0.29513343799058084,
"grad_norm": 0.2102809101343155,
"learning_rate": 7.056514913657771e-06,
"loss": 1.144,
"step": 376
},
{
"epoch": 0.29591836734693877,
"grad_norm": 0.21914629638195038,
"learning_rate": 7.048665620094192e-06,
"loss": 1.1649,
"step": 377
},
{
"epoch": 0.2967032967032967,
"grad_norm": 0.22110004723072052,
"learning_rate": 7.0408163265306125e-06,
"loss": 1.1096,
"step": 378
},
{
"epoch": 0.29748822605965464,
"grad_norm": 0.23272880911827087,
"learning_rate": 7.032967032967034e-06,
"loss": 1.17,
"step": 379
},
{
"epoch": 0.29827315541601257,
"grad_norm": 0.2373398244380951,
"learning_rate": 7.0251177394034545e-06,
"loss": 1.1734,
"step": 380
},
{
"epoch": 0.2990580847723705,
"grad_norm": 0.20945794880390167,
"learning_rate": 7.017268445839875e-06,
"loss": 1.1192,
"step": 381
},
{
"epoch": 0.29984301412872844,
"grad_norm": 0.22829462587833405,
"learning_rate": 7.0094191522762956e-06,
"loss": 1.0966,
"step": 382
},
{
"epoch": 0.30062794348508637,
"grad_norm": 0.28657934069633484,
"learning_rate": 7.001569858712717e-06,
"loss": 1.0827,
"step": 383
},
{
"epoch": 0.30141287284144425,
"grad_norm": 0.2213151603937149,
"learning_rate": 6.993720565149137e-06,
"loss": 1.1452,
"step": 384
},
{
"epoch": 0.3021978021978022,
"grad_norm": 0.2172708660364151,
"learning_rate": 6.985871271585558e-06,
"loss": 1.1301,
"step": 385
},
{
"epoch": 0.3029827315541601,
"grad_norm": 0.21562695503234863,
"learning_rate": 6.978021978021979e-06,
"loss": 1.0969,
"step": 386
},
{
"epoch": 0.30376766091051804,
"grad_norm": 0.22482970356941223,
"learning_rate": 6.970172684458399e-06,
"loss": 1.0412,
"step": 387
},
{
"epoch": 0.304552590266876,
"grad_norm": 0.22429226338863373,
"learning_rate": 6.96232339089482e-06,
"loss": 1.1175,
"step": 388
},
{
"epoch": 0.3053375196232339,
"grad_norm": 0.306858628988266,
"learning_rate": 6.954474097331241e-06,
"loss": 1.1633,
"step": 389
},
{
"epoch": 0.30612244897959184,
"grad_norm": 0.22035177052021027,
"learning_rate": 6.946624803767662e-06,
"loss": 1.105,
"step": 390
},
{
"epoch": 0.3069073783359498,
"grad_norm": 0.20838768780231476,
"learning_rate": 6.938775510204082e-06,
"loss": 1.1126,
"step": 391
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.24387304484844208,
"learning_rate": 6.930926216640504e-06,
"loss": 1.0718,
"step": 392
},
{
"epoch": 0.30847723704866564,
"grad_norm": 0.20998525619506836,
"learning_rate": 6.923076923076923e-06,
"loss": 1.1408,
"step": 393
},
{
"epoch": 0.3092621664050236,
"grad_norm": 0.21903569996356964,
"learning_rate": 6.915227629513345e-06,
"loss": 1.1225,
"step": 394
},
{
"epoch": 0.31004709576138145,
"grad_norm": 0.21235527098178864,
"learning_rate": 6.9073783359497645e-06,
"loss": 1.0895,
"step": 395
},
{
"epoch": 0.3108320251177394,
"grad_norm": 0.24162974953651428,
"learning_rate": 6.899529042386186e-06,
"loss": 1.1487,
"step": 396
},
{
"epoch": 0.3116169544740973,
"grad_norm": 0.22563737630844116,
"learning_rate": 6.8916797488226065e-06,
"loss": 1.1442,
"step": 397
},
{
"epoch": 0.31240188383045525,
"grad_norm": 0.2697785794734955,
"learning_rate": 6.883830455259027e-06,
"loss": 1.2054,
"step": 398
},
{
"epoch": 0.3131868131868132,
"grad_norm": 0.20973092317581177,
"learning_rate": 6.8759811616954476e-06,
"loss": 1.1354,
"step": 399
},
{
"epoch": 0.3139717425431711,
"grad_norm": 0.21411223709583282,
"learning_rate": 6.868131868131869e-06,
"loss": 1.1232,
"step": 400
},
{
"epoch": 0.31475667189952905,
"grad_norm": 0.2229021191596985,
"learning_rate": 6.8602825745682895e-06,
"loss": 1.0909,
"step": 401
},
{
"epoch": 0.315541601255887,
"grad_norm": 0.25998759269714355,
"learning_rate": 6.85243328100471e-06,
"loss": 1.1639,
"step": 402
},
{
"epoch": 0.3163265306122449,
"grad_norm": 0.2209548056125641,
"learning_rate": 6.8445839874411315e-06,
"loss": 1.122,
"step": 403
},
{
"epoch": 0.31711145996860285,
"grad_norm": 0.2104836255311966,
"learning_rate": 6.836734693877551e-06,
"loss": 1.1052,
"step": 404
},
{
"epoch": 0.3178963893249607,
"grad_norm": 0.22340314090251923,
"learning_rate": 6.828885400313973e-06,
"loss": 1.1621,
"step": 405
},
{
"epoch": 0.31868131868131866,
"grad_norm": 0.20940294861793518,
"learning_rate": 6.821036106750393e-06,
"loss": 1.0895,
"step": 406
},
{
"epoch": 0.3194662480376766,
"grad_norm": 0.23529349267482758,
"learning_rate": 6.813186813186814e-06,
"loss": 1.1796,
"step": 407
},
{
"epoch": 0.3202511773940345,
"grad_norm": 0.22994717955589294,
"learning_rate": 6.805337519623234e-06,
"loss": 1.1736,
"step": 408
},
{
"epoch": 0.32103610675039246,
"grad_norm": 0.21799279749393463,
"learning_rate": 6.797488226059656e-06,
"loss": 1.0804,
"step": 409
},
{
"epoch": 0.3218210361067504,
"grad_norm": 0.22495627403259277,
"learning_rate": 6.789638932496075e-06,
"loss": 1.1605,
"step": 410
},
{
"epoch": 0.3226059654631083,
"grad_norm": 0.236924409866333,
"learning_rate": 6.781789638932497e-06,
"loss": 1.1081,
"step": 411
},
{
"epoch": 0.32339089481946626,
"grad_norm": 0.22784234583377838,
"learning_rate": 6.773940345368918e-06,
"loss": 1.092,
"step": 412
},
{
"epoch": 0.3241758241758242,
"grad_norm": 0.22424954175949097,
"learning_rate": 6.766091051805338e-06,
"loss": 1.1431,
"step": 413
},
{
"epoch": 0.3249607535321821,
"grad_norm": 0.22108778357505798,
"learning_rate": 6.758241758241759e-06,
"loss": 1.0865,
"step": 414
},
{
"epoch": 0.32574568288854006,
"grad_norm": 0.22631670534610748,
"learning_rate": 6.750392464678179e-06,
"loss": 1.1481,
"step": 415
},
{
"epoch": 0.32653061224489793,
"grad_norm": 0.23880915343761444,
"learning_rate": 6.7425431711146e-06,
"loss": 1.1503,
"step": 416
},
{
"epoch": 0.32731554160125587,
"grad_norm": 0.21916764974594116,
"learning_rate": 6.734693877551021e-06,
"loss": 1.1001,
"step": 417
},
{
"epoch": 0.3281004709576138,
"grad_norm": 0.22851671278476715,
"learning_rate": 6.7268445839874415e-06,
"loss": 1.0496,
"step": 418
},
{
"epoch": 0.32888540031397173,
"grad_norm": 0.2205754965543747,
"learning_rate": 6.718995290423862e-06,
"loss": 1.1324,
"step": 419
},
{
"epoch": 0.32967032967032966,
"grad_norm": 0.2369145303964615,
"learning_rate": 6.7111459968602835e-06,
"loss": 1.1575,
"step": 420
},
{
"epoch": 0.3304552590266876,
"grad_norm": 0.21567904949188232,
"learning_rate": 6.703296703296703e-06,
"loss": 1.1247,
"step": 421
},
{
"epoch": 0.33124018838304553,
"grad_norm": 0.22381411492824554,
"learning_rate": 6.695447409733125e-06,
"loss": 1.0777,
"step": 422
},
{
"epoch": 0.33202511773940346,
"grad_norm": 0.21957136690616608,
"learning_rate": 6.687598116169546e-06,
"loss": 1.0945,
"step": 423
},
{
"epoch": 0.3328100470957614,
"grad_norm": 0.22392071783542633,
"learning_rate": 6.679748822605966e-06,
"loss": 1.084,
"step": 424
},
{
"epoch": 0.33359497645211933,
"grad_norm": 0.21616291999816895,
"learning_rate": 6.671899529042387e-06,
"loss": 1.0848,
"step": 425
},
{
"epoch": 0.33437990580847726,
"grad_norm": 0.2230328768491745,
"learning_rate": 6.664050235478807e-06,
"loss": 1.1058,
"step": 426
},
{
"epoch": 0.33516483516483514,
"grad_norm": 0.21062208712100983,
"learning_rate": 6.656200941915228e-06,
"loss": 1.0867,
"step": 427
},
{
"epoch": 0.3359497645211931,
"grad_norm": 0.22089996933937073,
"learning_rate": 6.648351648351649e-06,
"loss": 1.1032,
"step": 428
},
{
"epoch": 0.336734693877551,
"grad_norm": 0.2328905314207077,
"learning_rate": 6.64050235478807e-06,
"loss": 1.1551,
"step": 429
},
{
"epoch": 0.33751962323390894,
"grad_norm": 0.23525848984718323,
"learning_rate": 6.63265306122449e-06,
"loss": 1.1529,
"step": 430
},
{
"epoch": 0.33830455259026687,
"grad_norm": 0.2343936562538147,
"learning_rate": 6.624803767660911e-06,
"loss": 1.1262,
"step": 431
},
{
"epoch": 0.3390894819466248,
"grad_norm": 0.2223115712404251,
"learning_rate": 6.616954474097331e-06,
"loss": 1.0855,
"step": 432
},
{
"epoch": 0.33987441130298274,
"grad_norm": 0.24484506249427795,
"learning_rate": 6.609105180533752e-06,
"loss": 1.0972,
"step": 433
},
{
"epoch": 0.34065934065934067,
"grad_norm": 0.22786875069141388,
"learning_rate": 6.601255886970174e-06,
"loss": 1.0784,
"step": 434
},
{
"epoch": 0.3414442700156986,
"grad_norm": 0.23298341035842896,
"learning_rate": 6.5934065934065935e-06,
"loss": 1.1613,
"step": 435
},
{
"epoch": 0.34222919937205654,
"grad_norm": 0.2376134991645813,
"learning_rate": 6.585557299843015e-06,
"loss": 1.0892,
"step": 436
},
{
"epoch": 0.34301412872841447,
"grad_norm": 0.23039846122264862,
"learning_rate": 6.5777080062794355e-06,
"loss": 1.1073,
"step": 437
},
{
"epoch": 0.34379905808477235,
"grad_norm": 0.23520535230636597,
"learning_rate": 6.569858712715856e-06,
"loss": 1.0612,
"step": 438
},
{
"epoch": 0.3445839874411303,
"grad_norm": 0.26625117659568787,
"learning_rate": 6.562009419152277e-06,
"loss": 1.0821,
"step": 439
},
{
"epoch": 0.3453689167974882,
"grad_norm": 0.22589145600795746,
"learning_rate": 6.554160125588698e-06,
"loss": 1.0766,
"step": 440
},
{
"epoch": 0.34615384615384615,
"grad_norm": 0.23303988575935364,
"learning_rate": 6.546310832025118e-06,
"loss": 1.1523,
"step": 441
},
{
"epoch": 0.3469387755102041,
"grad_norm": 0.24992690980434418,
"learning_rate": 6.538461538461539e-06,
"loss": 1.1378,
"step": 442
},
{
"epoch": 0.347723704866562,
"grad_norm": 0.23018966615200043,
"learning_rate": 6.530612244897959e-06,
"loss": 1.1121,
"step": 443
},
{
"epoch": 0.34850863422291994,
"grad_norm": 0.23048517107963562,
"learning_rate": 6.52276295133438e-06,
"loss": 1.1031,
"step": 444
},
{
"epoch": 0.3492935635792779,
"grad_norm": 0.239332914352417,
"learning_rate": 6.514913657770802e-06,
"loss": 1.1592,
"step": 445
},
{
"epoch": 0.3500784929356358,
"grad_norm": 0.236043319106102,
"learning_rate": 6.507064364207221e-06,
"loss": 1.082,
"step": 446
},
{
"epoch": 0.35086342229199374,
"grad_norm": 0.22672872245311737,
"learning_rate": 6.499215070643643e-06,
"loss": 1.1188,
"step": 447
},
{
"epoch": 0.3516483516483517,
"grad_norm": 0.23188328742980957,
"learning_rate": 6.491365777080063e-06,
"loss": 1.1368,
"step": 448
},
{
"epoch": 0.35243328100470955,
"grad_norm": 0.23719431459903717,
"learning_rate": 6.483516483516485e-06,
"loss": 1.1847,
"step": 449
},
{
"epoch": 0.3532182103610675,
"grad_norm": 0.23115630447864532,
"learning_rate": 6.4756671899529044e-06,
"loss": 1.1583,
"step": 450
},
{
"epoch": 0.3540031397174254,
"grad_norm": 0.22967442870140076,
"learning_rate": 6.467817896389326e-06,
"loss": 1.1319,
"step": 451
},
{
"epoch": 0.35478806907378335,
"grad_norm": 0.22553735971450806,
"learning_rate": 6.4599686028257455e-06,
"loss": 1.0739,
"step": 452
},
{
"epoch": 0.3555729984301413,
"grad_norm": 0.2251790314912796,
"learning_rate": 6.452119309262167e-06,
"loss": 1.1276,
"step": 453
},
{
"epoch": 0.3563579277864992,
"grad_norm": 0.22052745521068573,
"learning_rate": 6.4442700156985875e-06,
"loss": 1.1079,
"step": 454
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.2503249943256378,
"learning_rate": 6.436420722135008e-06,
"loss": 1.1554,
"step": 455
},
{
"epoch": 0.3579277864992151,
"grad_norm": 0.23954695463180542,
"learning_rate": 6.4285714285714295e-06,
"loss": 1.1269,
"step": 456
},
{
"epoch": 0.358712715855573,
"grad_norm": 0.2597452700138092,
"learning_rate": 6.42072213500785e-06,
"loss": 1.0967,
"step": 457
},
{
"epoch": 0.35949764521193095,
"grad_norm": 0.2288360893726349,
"learning_rate": 6.4128728414442706e-06,
"loss": 1.1297,
"step": 458
},
{
"epoch": 0.3602825745682888,
"grad_norm": 0.23878303170204163,
"learning_rate": 6.405023547880691e-06,
"loss": 1.1208,
"step": 459
},
{
"epoch": 0.36106750392464676,
"grad_norm": 0.22452662885189056,
"learning_rate": 6.3971742543171125e-06,
"loss": 1.0891,
"step": 460
},
{
"epoch": 0.3618524332810047,
"grad_norm": 0.23185127973556519,
"learning_rate": 6.389324960753532e-06,
"loss": 1.058,
"step": 461
},
{
"epoch": 0.3626373626373626,
"grad_norm": 0.23189175128936768,
"learning_rate": 6.381475667189954e-06,
"loss": 1.0812,
"step": 462
},
{
"epoch": 0.36342229199372056,
"grad_norm": 0.23450538516044617,
"learning_rate": 6.373626373626373e-06,
"loss": 1.1089,
"step": 463
},
{
"epoch": 0.3642072213500785,
"grad_norm": 0.22508233785629272,
"learning_rate": 6.365777080062795e-06,
"loss": 1.1062,
"step": 464
},
{
"epoch": 0.3649921507064364,
"grad_norm": 0.24207262694835663,
"learning_rate": 6.357927786499215e-06,
"loss": 1.1396,
"step": 465
},
{
"epoch": 0.36577708006279436,
"grad_norm": 0.2606029510498047,
"learning_rate": 6.350078492935636e-06,
"loss": 1.1261,
"step": 466
},
{
"epoch": 0.3665620094191523,
"grad_norm": 0.22962482273578644,
"learning_rate": 6.342229199372057e-06,
"loss": 1.1354,
"step": 467
},
{
"epoch": 0.3673469387755102,
"grad_norm": 0.3107023239135742,
"learning_rate": 6.334379905808478e-06,
"loss": 1.1735,
"step": 468
},
{
"epoch": 0.36813186813186816,
"grad_norm": 0.23643136024475098,
"learning_rate": 6.326530612244899e-06,
"loss": 1.086,
"step": 469
},
{
"epoch": 0.36891679748822603,
"grad_norm": 0.240147203207016,
"learning_rate": 6.318681318681319e-06,
"loss": 1.1356,
"step": 470
},
{
"epoch": 0.36970172684458397,
"grad_norm": 0.2438742220401764,
"learning_rate": 6.31083202511774e-06,
"loss": 1.1539,
"step": 471
},
{
"epoch": 0.3704866562009419,
"grad_norm": 0.2621842920780182,
"learning_rate": 6.30298273155416e-06,
"loss": 1.1623,
"step": 472
},
{
"epoch": 0.37127158555729983,
"grad_norm": 0.246039479970932,
"learning_rate": 6.2951334379905815e-06,
"loss": 1.1418,
"step": 473
},
{
"epoch": 0.37205651491365777,
"grad_norm": 0.2392723560333252,
"learning_rate": 6.287284144427002e-06,
"loss": 1.1692,
"step": 474
},
{
"epoch": 0.3728414442700157,
"grad_norm": 0.23897096514701843,
"learning_rate": 6.279434850863423e-06,
"loss": 1.111,
"step": 475
},
{
"epoch": 0.37362637362637363,
"grad_norm": 0.24796399474143982,
"learning_rate": 6.271585557299843e-06,
"loss": 1.0847,
"step": 476
},
{
"epoch": 0.37441130298273156,
"grad_norm": 0.33171799778938293,
"learning_rate": 6.2637362637362645e-06,
"loss": 1.226,
"step": 477
},
{
"epoch": 0.3751962323390895,
"grad_norm": 0.2655907869338989,
"learning_rate": 6.255886970172685e-06,
"loss": 1.0548,
"step": 478
},
{
"epoch": 0.37598116169544743,
"grad_norm": 0.2475586235523224,
"learning_rate": 6.248037676609106e-06,
"loss": 1.0914,
"step": 479
},
{
"epoch": 0.37676609105180536,
"grad_norm": 0.2385740876197815,
"learning_rate": 6.240188383045527e-06,
"loss": 1.0984,
"step": 480
},
{
"epoch": 0.37755102040816324,
"grad_norm": 0.27485817670822144,
"learning_rate": 6.232339089481947e-06,
"loss": 1.1461,
"step": 481
},
{
"epoch": 0.3783359497645212,
"grad_norm": 0.23348768055438995,
"learning_rate": 6.224489795918368e-06,
"loss": 1.0617,
"step": 482
},
{
"epoch": 0.3791208791208791,
"grad_norm": 0.23941084742546082,
"learning_rate": 6.216640502354788e-06,
"loss": 1.104,
"step": 483
},
{
"epoch": 0.37990580847723704,
"grad_norm": 0.22513240575790405,
"learning_rate": 6.208791208791209e-06,
"loss": 1.0354,
"step": 484
},
{
"epoch": 0.38069073783359497,
"grad_norm": 0.2495739459991455,
"learning_rate": 6.20094191522763e-06,
"loss": 1.1075,
"step": 485
},
{
"epoch": 0.3814756671899529,
"grad_norm": 0.23454253375530243,
"learning_rate": 6.19309262166405e-06,
"loss": 1.07,
"step": 486
},
{
"epoch": 0.38226059654631084,
"grad_norm": 0.2577785849571228,
"learning_rate": 6.185243328100472e-06,
"loss": 1.1229,
"step": 487
},
{
"epoch": 0.38304552590266877,
"grad_norm": 0.24680796265602112,
"learning_rate": 6.177394034536892e-06,
"loss": 1.1011,
"step": 488
},
{
"epoch": 0.3838304552590267,
"grad_norm": 0.23051689565181732,
"learning_rate": 6.169544740973314e-06,
"loss": 1.0953,
"step": 489
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.25582489371299744,
"learning_rate": 6.1616954474097335e-06,
"loss": 1.0825,
"step": 490
},
{
"epoch": 0.38540031397174257,
"grad_norm": 0.2379298061132431,
"learning_rate": 6.153846153846155e-06,
"loss": 1.0859,
"step": 491
},
{
"epoch": 0.38618524332810045,
"grad_norm": 0.2519659996032715,
"learning_rate": 6.145996860282575e-06,
"loss": 1.1339,
"step": 492
},
{
"epoch": 0.3869701726844584,
"grad_norm": 0.23378105461597443,
"learning_rate": 6.138147566718996e-06,
"loss": 1.0865,
"step": 493
},
{
"epoch": 0.3877551020408163,
"grad_norm": 0.24967290461063385,
"learning_rate": 6.1302982731554165e-06,
"loss": 1.1257,
"step": 494
},
{
"epoch": 0.38854003139717425,
"grad_norm": 0.2489061802625656,
"learning_rate": 6.122448979591837e-06,
"loss": 1.108,
"step": 495
},
{
"epoch": 0.3893249607535322,
"grad_norm": 0.24166519939899445,
"learning_rate": 6.114599686028258e-06,
"loss": 1.1026,
"step": 496
},
{
"epoch": 0.3901098901098901,
"grad_norm": 0.2487422674894333,
"learning_rate": 6.106750392464679e-06,
"loss": 1.144,
"step": 497
},
{
"epoch": 0.39089481946624804,
"grad_norm": 0.2340540736913681,
"learning_rate": 6.0989010989011e-06,
"loss": 1.0835,
"step": 498
},
{
"epoch": 0.391679748822606,
"grad_norm": 0.24633920192718506,
"learning_rate": 6.09105180533752e-06,
"loss": 1.0741,
"step": 499
},
{
"epoch": 0.3924646781789639,
"grad_norm": 0.23868292570114136,
"learning_rate": 6.0832025117739416e-06,
"loss": 1.0815,
"step": 500
},
{
"epoch": 0.39324960753532184,
"grad_norm": 0.2488425076007843,
"learning_rate": 6.075353218210361e-06,
"loss": 1.09,
"step": 501
},
{
"epoch": 0.3940345368916798,
"grad_norm": 0.22781234979629517,
"learning_rate": 6.067503924646783e-06,
"loss": 1.0947,
"step": 502
},
{
"epoch": 0.39481946624803765,
"grad_norm": 0.24967414140701294,
"learning_rate": 6.059654631083202e-06,
"loss": 1.1331,
"step": 503
},
{
"epoch": 0.3956043956043956,
"grad_norm": 0.2647199332714081,
"learning_rate": 6.051805337519624e-06,
"loss": 1.1436,
"step": 504
},
{
"epoch": 0.3963893249607535,
"grad_norm": 0.24577093124389648,
"learning_rate": 6.043956043956044e-06,
"loss": 1.0649,
"step": 505
},
{
"epoch": 0.39717425431711145,
"grad_norm": 0.23209503293037415,
"learning_rate": 6.036106750392465e-06,
"loss": 1.0968,
"step": 506
},
{
"epoch": 0.3979591836734694,
"grad_norm": 0.23621460795402527,
"learning_rate": 6.0282574568288855e-06,
"loss": 1.0858,
"step": 507
},
{
"epoch": 0.3987441130298273,
"grad_norm": 0.23415528237819672,
"learning_rate": 6.020408163265307e-06,
"loss": 1.039,
"step": 508
},
{
"epoch": 0.39952904238618525,
"grad_norm": 0.24313485622406006,
"learning_rate": 6.012558869701728e-06,
"loss": 1.1233,
"step": 509
},
{
"epoch": 0.4003139717425432,
"grad_norm": 0.24086087942123413,
"learning_rate": 6.004709576138148e-06,
"loss": 1.0779,
"step": 510
},
{
"epoch": 0.4010989010989011,
"grad_norm": 0.22834287583827972,
"learning_rate": 5.996860282574569e-06,
"loss": 1.0448,
"step": 511
},
{
"epoch": 0.40188383045525905,
"grad_norm": 0.2525885999202728,
"learning_rate": 5.989010989010989e-06,
"loss": 1.0905,
"step": 512
},
{
"epoch": 0.4026687598116169,
"grad_norm": 0.26272568106651306,
"learning_rate": 5.9811616954474105e-06,
"loss": 1.114,
"step": 513
},
{
"epoch": 0.40345368916797486,
"grad_norm": 0.2448360174894333,
"learning_rate": 5.973312401883831e-06,
"loss": 1.0682,
"step": 514
},
{
"epoch": 0.4042386185243328,
"grad_norm": 0.24594442546367645,
"learning_rate": 5.965463108320252e-06,
"loss": 1.1067,
"step": 515
},
{
"epoch": 0.4050235478806907,
"grad_norm": 0.23227113485336304,
"learning_rate": 5.957613814756672e-06,
"loss": 1.0592,
"step": 516
},
{
"epoch": 0.40580847723704866,
"grad_norm": 0.24381886422634125,
"learning_rate": 5.949764521193094e-06,
"loss": 1.0932,
"step": 517
},
{
"epoch": 0.4065934065934066,
"grad_norm": 0.2468293309211731,
"learning_rate": 5.941915227629513e-06,
"loss": 1.0975,
"step": 518
},
{
"epoch": 0.4073783359497645,
"grad_norm": 0.24225494265556335,
"learning_rate": 5.934065934065935e-06,
"loss": 1.095,
"step": 519
},
{
"epoch": 0.40816326530612246,
"grad_norm": 0.2511250972747803,
"learning_rate": 5.926216640502356e-06,
"loss": 1.1268,
"step": 520
},
{
"epoch": 0.4089481946624804,
"grad_norm": 0.25592711567878723,
"learning_rate": 5.918367346938776e-06,
"loss": 1.0655,
"step": 521
},
{
"epoch": 0.4097331240188383,
"grad_norm": 0.2539741098880768,
"learning_rate": 5.910518053375197e-06,
"loss": 1.0909,
"step": 522
},
{
"epoch": 0.41051805337519626,
"grad_norm": 0.2586928904056549,
"learning_rate": 5.902668759811617e-06,
"loss": 1.102,
"step": 523
},
{
"epoch": 0.41130298273155413,
"grad_norm": 0.2530655860900879,
"learning_rate": 5.894819466248038e-06,
"loss": 1.1278,
"step": 524
},
{
"epoch": 0.41208791208791207,
"grad_norm": 0.2644376754760742,
"learning_rate": 5.886970172684459e-06,
"loss": 1.0853,
"step": 525
},
{
"epoch": 0.41287284144427,
"grad_norm": 0.2579070031642914,
"learning_rate": 5.8791208791208794e-06,
"loss": 1.0258,
"step": 526
},
{
"epoch": 0.41365777080062793,
"grad_norm": 0.2522946894168854,
"learning_rate": 5.8712715855573e-06,
"loss": 1.0873,
"step": 527
},
{
"epoch": 0.41444270015698587,
"grad_norm": 0.24585580825805664,
"learning_rate": 5.863422291993721e-06,
"loss": 1.0685,
"step": 528
},
{
"epoch": 0.4152276295133438,
"grad_norm": 0.23922014236450195,
"learning_rate": 5.855572998430141e-06,
"loss": 1.0501,
"step": 529
},
{
"epoch": 0.41601255886970173,
"grad_norm": 0.3307192325592041,
"learning_rate": 5.8477237048665625e-06,
"loss": 1.098,
"step": 530
},
{
"epoch": 0.41679748822605966,
"grad_norm": 0.26279258728027344,
"learning_rate": 5.839874411302984e-06,
"loss": 1.1451,
"step": 531
},
{
"epoch": 0.4175824175824176,
"grad_norm": 0.24915215373039246,
"learning_rate": 5.832025117739404e-06,
"loss": 1.0702,
"step": 532
},
{
"epoch": 0.41836734693877553,
"grad_norm": 0.3191397488117218,
"learning_rate": 5.824175824175825e-06,
"loss": 1.1355,
"step": 533
},
{
"epoch": 0.41915227629513346,
"grad_norm": 0.24395009875297546,
"learning_rate": 5.816326530612246e-06,
"loss": 1.0693,
"step": 534
},
{
"epoch": 0.41993720565149134,
"grad_norm": 0.2642136812210083,
"learning_rate": 5.808477237048666e-06,
"loss": 1.0774,
"step": 535
},
{
"epoch": 0.4207221350078493,
"grad_norm": 0.24453726410865784,
"learning_rate": 5.800627943485087e-06,
"loss": 1.0767,
"step": 536
},
{
"epoch": 0.4215070643642072,
"grad_norm": 0.381024569272995,
"learning_rate": 5.792778649921508e-06,
"loss": 1.1438,
"step": 537
},
{
"epoch": 0.42229199372056514,
"grad_norm": 0.30179327726364136,
"learning_rate": 5.784929356357928e-06,
"loss": 1.1929,
"step": 538
},
{
"epoch": 0.4230769230769231,
"grad_norm": 0.24929693341255188,
"learning_rate": 5.777080062794349e-06,
"loss": 1.1482,
"step": 539
},
{
"epoch": 0.423861852433281,
"grad_norm": 0.26368796825408936,
"learning_rate": 5.769230769230769e-06,
"loss": 1.0321,
"step": 540
},
{
"epoch": 0.42464678178963894,
"grad_norm": 0.2380351573228836,
"learning_rate": 5.76138147566719e-06,
"loss": 1.1042,
"step": 541
},
{
"epoch": 0.42543171114599687,
"grad_norm": 0.25883641839027405,
"learning_rate": 5.753532182103612e-06,
"loss": 1.0072,
"step": 542
},
{
"epoch": 0.4262166405023548,
"grad_norm": 0.23825643956661224,
"learning_rate": 5.7456828885400314e-06,
"loss": 1.0884,
"step": 543
},
{
"epoch": 0.42700156985871274,
"grad_norm": 0.3005066215991974,
"learning_rate": 5.737833594976453e-06,
"loss": 1.0975,
"step": 544
},
{
"epoch": 0.42778649921507067,
"grad_norm": 0.24875208735466003,
"learning_rate": 5.729984301412873e-06,
"loss": 1.1003,
"step": 545
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.350685715675354,
"learning_rate": 5.722135007849294e-06,
"loss": 1.0723,
"step": 546
},
{
"epoch": 0.4293563579277865,
"grad_norm": 0.2770320177078247,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.1224,
"step": 547
},
{
"epoch": 0.4301412872841444,
"grad_norm": 0.24251538515090942,
"learning_rate": 5.706436420722136e-06,
"loss": 1.0548,
"step": 548
},
{
"epoch": 0.43092621664050235,
"grad_norm": 0.24872933328151703,
"learning_rate": 5.698587127158556e-06,
"loss": 1.0798,
"step": 549
},
{
"epoch": 0.4317111459968603,
"grad_norm": 0.2544524669647217,
"learning_rate": 5.690737833594977e-06,
"loss": 1.1251,
"step": 550
},
{
"epoch": 0.4324960753532182,
"grad_norm": 0.24090701341629028,
"learning_rate": 5.682888540031397e-06,
"loss": 1.1367,
"step": 551
},
{
"epoch": 0.43328100470957615,
"grad_norm": 0.23915883898735046,
"learning_rate": 5.675039246467818e-06,
"loss": 1.0647,
"step": 552
},
{
"epoch": 0.4340659340659341,
"grad_norm": 0.2448183000087738,
"learning_rate": 5.6671899529042395e-06,
"loss": 1.0755,
"step": 553
},
{
"epoch": 0.434850863422292,
"grad_norm": 0.25648248195648193,
"learning_rate": 5.65934065934066e-06,
"loss": 1.0525,
"step": 554
},
{
"epoch": 0.43563579277864994,
"grad_norm": 0.23780952394008636,
"learning_rate": 5.651491365777081e-06,
"loss": 1.0902,
"step": 555
},
{
"epoch": 0.4364207221350079,
"grad_norm": 0.2836390733718872,
"learning_rate": 5.643642072213501e-06,
"loss": 1.0601,
"step": 556
},
{
"epoch": 0.43720565149136575,
"grad_norm": 0.24821418523788452,
"learning_rate": 5.635792778649923e-06,
"loss": 1.083,
"step": 557
},
{
"epoch": 0.4379905808477237,
"grad_norm": 0.25154179334640503,
"learning_rate": 5.627943485086342e-06,
"loss": 1.1134,
"step": 558
},
{
"epoch": 0.4387755102040816,
"grad_norm": 0.26084184646606445,
"learning_rate": 5.620094191522764e-06,
"loss": 1.111,
"step": 559
},
{
"epoch": 0.43956043956043955,
"grad_norm": 0.2489292174577713,
"learning_rate": 5.6122448979591834e-06,
"loss": 1.1249,
"step": 560
},
{
"epoch": 0.4403453689167975,
"grad_norm": 0.2615135610103607,
"learning_rate": 5.604395604395605e-06,
"loss": 1.1283,
"step": 561
},
{
"epoch": 0.4411302982731554,
"grad_norm": 0.2750528156757355,
"learning_rate": 5.596546310832025e-06,
"loss": 1.0626,
"step": 562
},
{
"epoch": 0.44191522762951335,
"grad_norm": 0.2562703788280487,
"learning_rate": 5.588697017268446e-06,
"loss": 1.0732,
"step": 563
},
{
"epoch": 0.4427001569858713,
"grad_norm": 0.25479015707969666,
"learning_rate": 5.580847723704867e-06,
"loss": 1.0509,
"step": 564
},
{
"epoch": 0.4434850863422292,
"grad_norm": 0.30177152156829834,
"learning_rate": 5.572998430141288e-06,
"loss": 1.0856,
"step": 565
},
{
"epoch": 0.44427001569858715,
"grad_norm": 0.2459626942873001,
"learning_rate": 5.5651491365777085e-06,
"loss": 1.0828,
"step": 566
},
{
"epoch": 0.44505494505494503,
"grad_norm": 0.2764773964881897,
"learning_rate": 5.557299843014129e-06,
"loss": 1.1437,
"step": 567
},
{
"epoch": 0.44583987441130296,
"grad_norm": 0.24312351644039154,
"learning_rate": 5.5494505494505504e-06,
"loss": 1.0679,
"step": 568
},
{
"epoch": 0.4466248037676609,
"grad_norm": 0.5099506974220276,
"learning_rate": 5.54160125588697e-06,
"loss": 1.1154,
"step": 569
},
{
"epoch": 0.4474097331240188,
"grad_norm": 0.24318666756153107,
"learning_rate": 5.5337519623233915e-06,
"loss": 1.0725,
"step": 570
},
{
"epoch": 0.44819466248037676,
"grad_norm": 0.2769014835357666,
"learning_rate": 5.525902668759811e-06,
"loss": 1.1424,
"step": 571
},
{
"epoch": 0.4489795918367347,
"grad_norm": 0.2566874921321869,
"learning_rate": 5.518053375196233e-06,
"loss": 1.1231,
"step": 572
},
{
"epoch": 0.4497645211930926,
"grad_norm": 0.27724429965019226,
"learning_rate": 5.510204081632653e-06,
"loss": 1.105,
"step": 573
},
{
"epoch": 0.45054945054945056,
"grad_norm": 0.28734901547431946,
"learning_rate": 5.502354788069074e-06,
"loss": 1.1076,
"step": 574
},
{
"epoch": 0.4513343799058085,
"grad_norm": 0.25727584958076477,
"learning_rate": 5.494505494505495e-06,
"loss": 1.0624,
"step": 575
},
{
"epoch": 0.4521193092621664,
"grad_norm": 0.2726932764053345,
"learning_rate": 5.486656200941916e-06,
"loss": 1.0954,
"step": 576
},
{
"epoch": 0.45290423861852436,
"grad_norm": 0.249312624335289,
"learning_rate": 5.478806907378337e-06,
"loss": 1.1038,
"step": 577
},
{
"epoch": 0.45368916797488223,
"grad_norm": 0.2610965669155121,
"learning_rate": 5.470957613814757e-06,
"loss": 1.1033,
"step": 578
},
{
"epoch": 0.45447409733124017,
"grad_norm": 0.2687593400478363,
"learning_rate": 5.463108320251178e-06,
"loss": 1.1013,
"step": 579
},
{
"epoch": 0.4552590266875981,
"grad_norm": 0.2867527902126312,
"learning_rate": 5.455259026687598e-06,
"loss": 1.104,
"step": 580
},
{
"epoch": 0.45604395604395603,
"grad_norm": 0.2627805769443512,
"learning_rate": 5.447409733124019e-06,
"loss": 1.0958,
"step": 581
},
{
"epoch": 0.45682888540031397,
"grad_norm": 0.25707143545150757,
"learning_rate": 5.43956043956044e-06,
"loss": 1.1073,
"step": 582
},
{
"epoch": 0.4576138147566719,
"grad_norm": 0.26457729935646057,
"learning_rate": 5.4317111459968605e-06,
"loss": 1.1379,
"step": 583
},
{
"epoch": 0.45839874411302983,
"grad_norm": 0.25039640069007874,
"learning_rate": 5.423861852433281e-06,
"loss": 1.1107,
"step": 584
},
{
"epoch": 0.45918367346938777,
"grad_norm": 0.2630631923675537,
"learning_rate": 5.4160125588697024e-06,
"loss": 1.0336,
"step": 585
},
{
"epoch": 0.4599686028257457,
"grad_norm": 0.2637465298175812,
"learning_rate": 5.408163265306123e-06,
"loss": 1.1183,
"step": 586
},
{
"epoch": 0.46075353218210363,
"grad_norm": 0.2565993368625641,
"learning_rate": 5.4003139717425436e-06,
"loss": 1.0769,
"step": 587
},
{
"epoch": 0.46153846153846156,
"grad_norm": 0.2511787414550781,
"learning_rate": 5.392464678178965e-06,
"loss": 1.1005,
"step": 588
},
{
"epoch": 0.46232339089481944,
"grad_norm": 0.24590060114860535,
"learning_rate": 5.384615384615385e-06,
"loss": 1.1103,
"step": 589
},
{
"epoch": 0.4631083202511774,
"grad_norm": 0.26895543932914734,
"learning_rate": 5.376766091051806e-06,
"loss": 1.0971,
"step": 590
},
{
"epoch": 0.4638932496075353,
"grad_norm": 0.25688987970352173,
"learning_rate": 5.368916797488226e-06,
"loss": 1.0973,
"step": 591
},
{
"epoch": 0.46467817896389324,
"grad_norm": 0.2657226026058197,
"learning_rate": 5.361067503924647e-06,
"loss": 1.124,
"step": 592
},
{
"epoch": 0.4654631083202512,
"grad_norm": 0.2537216544151306,
"learning_rate": 5.353218210361068e-06,
"loss": 1.0958,
"step": 593
},
{
"epoch": 0.4662480376766091,
"grad_norm": 0.32557958364486694,
"learning_rate": 5.345368916797488e-06,
"loss": 1.1469,
"step": 594
},
{
"epoch": 0.46703296703296704,
"grad_norm": 0.25399667024612427,
"learning_rate": 5.33751962323391e-06,
"loss": 1.0587,
"step": 595
},
{
"epoch": 0.46781789638932497,
"grad_norm": 0.26794350147247314,
"learning_rate": 5.32967032967033e-06,
"loss": 1.058,
"step": 596
},
{
"epoch": 0.4686028257456829,
"grad_norm": 0.2695688009262085,
"learning_rate": 5.321821036106752e-06,
"loss": 1.1169,
"step": 597
},
{
"epoch": 0.46938775510204084,
"grad_norm": 0.2774251103401184,
"learning_rate": 5.313971742543171e-06,
"loss": 1.1368,
"step": 598
},
{
"epoch": 0.47017268445839877,
"grad_norm": 0.25040146708488464,
"learning_rate": 5.306122448979593e-06,
"loss": 1.0831,
"step": 599
},
{
"epoch": 0.47095761381475665,
"grad_norm": 0.2619199752807617,
"learning_rate": 5.2982731554160125e-06,
"loss": 1.0354,
"step": 600
},
{
"epoch": 0.4717425431711146,
"grad_norm": 0.25245144963264465,
"learning_rate": 5.290423861852434e-06,
"loss": 1.0487,
"step": 601
},
{
"epoch": 0.4725274725274725,
"grad_norm": 0.25916045904159546,
"learning_rate": 5.2825745682888544e-06,
"loss": 1.0987,
"step": 602
},
{
"epoch": 0.47331240188383045,
"grad_norm": 0.2599029541015625,
"learning_rate": 5.274725274725275e-06,
"loss": 1.0945,
"step": 603
},
{
"epoch": 0.4740973312401884,
"grad_norm": 0.27398571372032166,
"learning_rate": 5.2668759811616956e-06,
"loss": 1.0602,
"step": 604
},
{
"epoch": 0.4748822605965463,
"grad_norm": 0.24853894114494324,
"learning_rate": 5.259026687598117e-06,
"loss": 1.0169,
"step": 605
},
{
"epoch": 0.47566718995290425,
"grad_norm": 0.24999196827411652,
"learning_rate": 5.2511773940345375e-06,
"loss": 1.0557,
"step": 606
},
{
"epoch": 0.4764521193092622,
"grad_norm": 0.2587362229824066,
"learning_rate": 5.243328100470958e-06,
"loss": 1.0832,
"step": 607
},
{
"epoch": 0.4772370486656201,
"grad_norm": 0.255636990070343,
"learning_rate": 5.2354788069073795e-06,
"loss": 1.0378,
"step": 608
},
{
"epoch": 0.47802197802197804,
"grad_norm": 0.2529115080833435,
"learning_rate": 5.227629513343799e-06,
"loss": 1.0544,
"step": 609
},
{
"epoch": 0.478806907378336,
"grad_norm": 0.3072440028190613,
"learning_rate": 5.219780219780221e-06,
"loss": 1.1386,
"step": 610
},
{
"epoch": 0.47959183673469385,
"grad_norm": 0.24107390642166138,
"learning_rate": 5.21193092621664e-06,
"loss": 1.0658,
"step": 611
},
{
"epoch": 0.4803767660910518,
"grad_norm": 0.2693899869918823,
"learning_rate": 5.204081632653062e-06,
"loss": 1.0966,
"step": 612
},
{
"epoch": 0.4811616954474097,
"grad_norm": 0.2495475560426712,
"learning_rate": 5.196232339089482e-06,
"loss": 1.087,
"step": 613
},
{
"epoch": 0.48194662480376765,
"grad_norm": 0.2680012285709381,
"learning_rate": 5.188383045525903e-06,
"loss": 1.092,
"step": 614
},
{
"epoch": 0.4827315541601256,
"grad_norm": 0.2574861943721771,
"learning_rate": 5.180533751962323e-06,
"loss": 1.089,
"step": 615
},
{
"epoch": 0.4835164835164835,
"grad_norm": 0.2980441153049469,
"learning_rate": 5.172684458398745e-06,
"loss": 1.0895,
"step": 616
},
{
"epoch": 0.48430141287284145,
"grad_norm": 0.2533935606479645,
"learning_rate": 5.164835164835166e-06,
"loss": 1.0627,
"step": 617
},
{
"epoch": 0.4850863422291994,
"grad_norm": 0.2828797399997711,
"learning_rate": 5.156985871271586e-06,
"loss": 1.0874,
"step": 618
},
{
"epoch": 0.4858712715855573,
"grad_norm": 0.26118507981300354,
"learning_rate": 5.149136577708007e-06,
"loss": 1.0304,
"step": 619
},
{
"epoch": 0.48665620094191525,
"grad_norm": 0.277045339345932,
"learning_rate": 5.141287284144427e-06,
"loss": 1.0492,
"step": 620
},
{
"epoch": 0.48744113029827313,
"grad_norm": 0.2713971734046936,
"learning_rate": 5.133437990580848e-06,
"loss": 1.1037,
"step": 621
},
{
"epoch": 0.48822605965463106,
"grad_norm": 0.27905189990997314,
"learning_rate": 5.125588697017269e-06,
"loss": 1.0899,
"step": 622
},
{
"epoch": 0.489010989010989,
"grad_norm": 0.2574610710144043,
"learning_rate": 5.1177394034536895e-06,
"loss": 1.0842,
"step": 623
},
{
"epoch": 0.4897959183673469,
"grad_norm": 0.25002968311309814,
"learning_rate": 5.10989010989011e-06,
"loss": 1.0707,
"step": 624
},
{
"epoch": 0.49058084772370486,
"grad_norm": 0.25703486800193787,
"learning_rate": 5.1020408163265315e-06,
"loss": 1.0427,
"step": 625
},
{
"epoch": 0.4913657770800628,
"grad_norm": 0.26097506284713745,
"learning_rate": 5.094191522762951e-06,
"loss": 1.0554,
"step": 626
},
{
"epoch": 0.4921507064364207,
"grad_norm": 0.25719407200813293,
"learning_rate": 5.086342229199373e-06,
"loss": 1.0612,
"step": 627
},
{
"epoch": 0.49293563579277866,
"grad_norm": 0.25571000576019287,
"learning_rate": 5.078492935635794e-06,
"loss": 1.0471,
"step": 628
},
{
"epoch": 0.4937205651491366,
"grad_norm": 0.2634165287017822,
"learning_rate": 5.070643642072214e-06,
"loss": 1.0811,
"step": 629
},
{
"epoch": 0.4945054945054945,
"grad_norm": 0.247939795255661,
"learning_rate": 5.062794348508635e-06,
"loss": 1.0038,
"step": 630
},
{
"epoch": 0.49529042386185246,
"grad_norm": 0.27619796991348267,
"learning_rate": 5.054945054945055e-06,
"loss": 1.1057,
"step": 631
},
{
"epoch": 0.49607535321821034,
"grad_norm": 0.26260074973106384,
"learning_rate": 5.047095761381476e-06,
"loss": 1.1193,
"step": 632
},
{
"epoch": 0.49686028257456827,
"grad_norm": 0.258558988571167,
"learning_rate": 5.039246467817897e-06,
"loss": 1.0687,
"step": 633
},
{
"epoch": 0.4976452119309262,
"grad_norm": 0.26512664556503296,
"learning_rate": 5.031397174254317e-06,
"loss": 1.0561,
"step": 634
},
{
"epoch": 0.49843014128728413,
"grad_norm": 0.2602575421333313,
"learning_rate": 5.023547880690738e-06,
"loss": 1.0438,
"step": 635
},
{
"epoch": 0.49921507064364207,
"grad_norm": 0.2699330151081085,
"learning_rate": 5.015698587127159e-06,
"loss": 1.0989,
"step": 636
},
{
"epoch": 0.5,
"grad_norm": 0.27960050106048584,
"learning_rate": 5.007849293563579e-06,
"loss": 1.0354,
"step": 637
},
{
"epoch": 0.5007849293563579,
"grad_norm": 0.287338525056839,
"learning_rate": 5e-06,
"loss": 1.1538,
"step": 638
},
{
"epoch": 0.5015698587127159,
"grad_norm": 0.26822763681411743,
"learning_rate": 4.992150706436421e-06,
"loss": 1.0374,
"step": 639
},
{
"epoch": 0.5023547880690737,
"grad_norm": 0.2564684748649597,
"learning_rate": 4.9843014128728415e-06,
"loss": 1.0611,
"step": 640
},
{
"epoch": 0.5031397174254317,
"grad_norm": 0.2621293365955353,
"learning_rate": 4.976452119309262e-06,
"loss": 1.0946,
"step": 641
},
{
"epoch": 0.5039246467817896,
"grad_norm": 0.2555181086063385,
"learning_rate": 4.9686028257456835e-06,
"loss": 1.0753,
"step": 642
},
{
"epoch": 0.5047095761381476,
"grad_norm": 0.27201956510543823,
"learning_rate": 4.960753532182104e-06,
"loss": 1.0945,
"step": 643
},
{
"epoch": 0.5054945054945055,
"grad_norm": 0.2534913420677185,
"learning_rate": 4.952904238618525e-06,
"loss": 1.0746,
"step": 644
},
{
"epoch": 0.5062794348508635,
"grad_norm": 0.25126805901527405,
"learning_rate": 4.945054945054946e-06,
"loss": 1.0589,
"step": 645
},
{
"epoch": 0.5070643642072213,
"grad_norm": 0.27167466282844543,
"learning_rate": 4.9372056514913666e-06,
"loss": 1.1345,
"step": 646
},
{
"epoch": 0.5078492935635793,
"grad_norm": 0.25426211953163147,
"learning_rate": 4.929356357927787e-06,
"loss": 1.0779,
"step": 647
},
{
"epoch": 0.5086342229199372,
"grad_norm": 0.2680758237838745,
"learning_rate": 4.921507064364208e-06,
"loss": 1.0908,
"step": 648
},
{
"epoch": 0.5094191522762951,
"grad_norm": 0.26956409215927124,
"learning_rate": 4.913657770800628e-06,
"loss": 1.0492,
"step": 649
},
{
"epoch": 0.5102040816326531,
"grad_norm": 0.2579835057258606,
"learning_rate": 4.905808477237049e-06,
"loss": 1.0819,
"step": 650
},
{
"epoch": 0.510989010989011,
"grad_norm": 0.2698979675769806,
"learning_rate": 4.897959183673469e-06,
"loss": 1.069,
"step": 651
},
{
"epoch": 0.5117739403453689,
"grad_norm": 0.27058646082878113,
"learning_rate": 4.890109890109891e-06,
"loss": 1.0652,
"step": 652
},
{
"epoch": 0.5125588697017268,
"grad_norm": 0.28998565673828125,
"learning_rate": 4.882260596546311e-06,
"loss": 1.1569,
"step": 653
},
{
"epoch": 0.5133437990580848,
"grad_norm": 0.2735849916934967,
"learning_rate": 4.874411302982732e-06,
"loss": 1.084,
"step": 654
},
{
"epoch": 0.5141287284144427,
"grad_norm": 0.2719517648220062,
"learning_rate": 4.866562009419153e-06,
"loss": 1.0997,
"step": 655
},
{
"epoch": 0.5149136577708007,
"grad_norm": 0.2657535672187805,
"learning_rate": 4.858712715855574e-06,
"loss": 1.0977,
"step": 656
},
{
"epoch": 0.5156985871271585,
"grad_norm": 0.2546514570713043,
"learning_rate": 4.850863422291994e-06,
"loss": 1.0897,
"step": 657
},
{
"epoch": 0.5164835164835165,
"grad_norm": 0.2730329632759094,
"learning_rate": 4.843014128728415e-06,
"loss": 1.1082,
"step": 658
},
{
"epoch": 0.5172684458398744,
"grad_norm": 0.2657451331615448,
"learning_rate": 4.8351648351648355e-06,
"loss": 1.0823,
"step": 659
},
{
"epoch": 0.5180533751962323,
"grad_norm": 0.28653398156166077,
"learning_rate": 4.827315541601256e-06,
"loss": 1.0555,
"step": 660
},
{
"epoch": 0.5188383045525903,
"grad_norm": 0.2710787355899811,
"learning_rate": 4.819466248037677e-06,
"loss": 1.0385,
"step": 661
},
{
"epoch": 0.5196232339089482,
"grad_norm": 0.26727956533432007,
"learning_rate": 4.811616954474098e-06,
"loss": 1.068,
"step": 662
},
{
"epoch": 0.5204081632653061,
"grad_norm": 0.25900620222091675,
"learning_rate": 4.8037676609105186e-06,
"loss": 1.0889,
"step": 663
},
{
"epoch": 0.521193092621664,
"grad_norm": 0.26514261960983276,
"learning_rate": 4.795918367346939e-06,
"loss": 1.0978,
"step": 664
},
{
"epoch": 0.521978021978022,
"grad_norm": 0.29534074664115906,
"learning_rate": 4.7880690737833605e-06,
"loss": 1.0289,
"step": 665
},
{
"epoch": 0.5227629513343799,
"grad_norm": 0.2783236801624298,
"learning_rate": 4.780219780219781e-06,
"loss": 1.1002,
"step": 666
},
{
"epoch": 0.5235478806907379,
"grad_norm": 0.2634176015853882,
"learning_rate": 4.772370486656202e-06,
"loss": 1.0518,
"step": 667
},
{
"epoch": 0.5243328100470958,
"grad_norm": 0.27170518040657043,
"learning_rate": 4.764521193092622e-06,
"loss": 1.0823,
"step": 668
},
{
"epoch": 0.5251177394034537,
"grad_norm": 0.26880884170532227,
"learning_rate": 4.756671899529043e-06,
"loss": 1.0384,
"step": 669
},
{
"epoch": 0.5259026687598116,
"grad_norm": 0.27249330282211304,
"learning_rate": 4.748822605965463e-06,
"loss": 1.0706,
"step": 670
},
{
"epoch": 0.5266875981161695,
"grad_norm": 0.2680225074291229,
"learning_rate": 4.740973312401884e-06,
"loss": 1.0794,
"step": 671
},
{
"epoch": 0.5274725274725275,
"grad_norm": 0.27644068002700806,
"learning_rate": 4.733124018838305e-06,
"loss": 1.1037,
"step": 672
},
{
"epoch": 0.5282574568288854,
"grad_norm": 0.2654259204864502,
"learning_rate": 4.725274725274726e-06,
"loss": 1.0524,
"step": 673
},
{
"epoch": 0.5290423861852434,
"grad_norm": 0.2548593282699585,
"learning_rate": 4.717425431711146e-06,
"loss": 1.0596,
"step": 674
},
{
"epoch": 0.5298273155416012,
"grad_norm": 0.262279748916626,
"learning_rate": 4.709576138147567e-06,
"loss": 1.0731,
"step": 675
},
{
"epoch": 0.5306122448979592,
"grad_norm": 0.3365892767906189,
"learning_rate": 4.701726844583988e-06,
"loss": 1.1082,
"step": 676
},
{
"epoch": 0.5313971742543171,
"grad_norm": 0.25070279836654663,
"learning_rate": 4.693877551020409e-06,
"loss": 1.0554,
"step": 677
},
{
"epoch": 0.5321821036106751,
"grad_norm": 0.3416726291179657,
"learning_rate": 4.6860282574568294e-06,
"loss": 1.0501,
"step": 678
},
{
"epoch": 0.532967032967033,
"grad_norm": 0.2714819610118866,
"learning_rate": 4.67817896389325e-06,
"loss": 1.0317,
"step": 679
},
{
"epoch": 0.533751962323391,
"grad_norm": 0.2663348615169525,
"learning_rate": 4.6703296703296706e-06,
"loss": 1.0699,
"step": 680
},
{
"epoch": 0.5345368916797488,
"grad_norm": 0.26617753505706787,
"learning_rate": 4.662480376766091e-06,
"loss": 1.0755,
"step": 681
},
{
"epoch": 0.5353218210361067,
"grad_norm": 0.25647595524787903,
"learning_rate": 4.6546310832025125e-06,
"loss": 1.0746,
"step": 682
},
{
"epoch": 0.5361067503924647,
"grad_norm": 0.2799610197544098,
"learning_rate": 4.646781789638933e-06,
"loss": 1.0977,
"step": 683
},
{
"epoch": 0.5368916797488226,
"grad_norm": 0.26751118898391724,
"learning_rate": 4.638932496075354e-06,
"loss": 1.0633,
"step": 684
},
{
"epoch": 0.5376766091051806,
"grad_norm": 0.28023260831832886,
"learning_rate": 4.631083202511774e-06,
"loss": 1.0927,
"step": 685
},
{
"epoch": 0.5384615384615384,
"grad_norm": 0.27010107040405273,
"learning_rate": 4.623233908948195e-06,
"loss": 1.0869,
"step": 686
},
{
"epoch": 0.5392464678178964,
"grad_norm": 0.2625042796134949,
"learning_rate": 4.615384615384616e-06,
"loss": 1.0615,
"step": 687
},
{
"epoch": 0.5400313971742543,
"grad_norm": 0.2663073241710663,
"learning_rate": 4.607535321821037e-06,
"loss": 1.0473,
"step": 688
},
{
"epoch": 0.5408163265306123,
"grad_norm": 0.26662012934684753,
"learning_rate": 4.599686028257457e-06,
"loss": 1.0611,
"step": 689
},
{
"epoch": 0.5416012558869702,
"grad_norm": 0.2595308721065521,
"learning_rate": 4.591836734693878e-06,
"loss": 1.0645,
"step": 690
},
{
"epoch": 0.542386185243328,
"grad_norm": 0.286700040102005,
"learning_rate": 4.583987441130298e-06,
"loss": 1.0211,
"step": 691
},
{
"epoch": 0.543171114599686,
"grad_norm": 0.2713211476802826,
"learning_rate": 4.57613814756672e-06,
"loss": 1.0621,
"step": 692
},
{
"epoch": 0.5439560439560439,
"grad_norm": 0.2682141363620758,
"learning_rate": 4.56828885400314e-06,
"loss": 1.0571,
"step": 693
},
{
"epoch": 0.5447409733124019,
"grad_norm": 0.2777917683124542,
"learning_rate": 4.560439560439561e-06,
"loss": 1.0733,
"step": 694
},
{
"epoch": 0.5455259026687598,
"grad_norm": 0.25854945182800293,
"learning_rate": 4.5525902668759815e-06,
"loss": 1.0926,
"step": 695
},
{
"epoch": 0.5463108320251178,
"grad_norm": 0.2838626801967621,
"learning_rate": 4.544740973312402e-06,
"loss": 1.0707,
"step": 696
},
{
"epoch": 0.5470957613814756,
"grad_norm": 0.26979881525039673,
"learning_rate": 4.5368916797488226e-06,
"loss": 1.0949,
"step": 697
},
{
"epoch": 0.5478806907378336,
"grad_norm": 0.2945154309272766,
"learning_rate": 4.529042386185244e-06,
"loss": 1.1497,
"step": 698
},
{
"epoch": 0.5486656200941915,
"grad_norm": 0.25463616847991943,
"learning_rate": 4.5211930926216645e-06,
"loss": 1.0762,
"step": 699
},
{
"epoch": 0.5494505494505495,
"grad_norm": 0.2613489031791687,
"learning_rate": 4.513343799058085e-06,
"loss": 1.0629,
"step": 700
},
{
"epoch": 0.5502354788069074,
"grad_norm": 0.2718147039413452,
"learning_rate": 4.505494505494506e-06,
"loss": 1.1294,
"step": 701
},
{
"epoch": 0.5510204081632653,
"grad_norm": 0.2775886654853821,
"learning_rate": 4.497645211930927e-06,
"loss": 1.0303,
"step": 702
},
{
"epoch": 0.5518053375196232,
"grad_norm": 0.2916508913040161,
"learning_rate": 4.489795918367348e-06,
"loss": 1.0797,
"step": 703
},
{
"epoch": 0.5525902668759811,
"grad_norm": 0.2996635138988495,
"learning_rate": 4.481946624803768e-06,
"loss": 1.0696,
"step": 704
},
{
"epoch": 0.5533751962323391,
"grad_norm": 0.31456199288368225,
"learning_rate": 4.474097331240189e-06,
"loss": 1.0751,
"step": 705
},
{
"epoch": 0.554160125588697,
"grad_norm": 0.26043495535850525,
"learning_rate": 4.466248037676609e-06,
"loss": 1.0588,
"step": 706
},
{
"epoch": 0.554945054945055,
"grad_norm": 0.27495190501213074,
"learning_rate": 4.45839874411303e-06,
"loss": 1.0479,
"step": 707
},
{
"epoch": 0.5557299843014128,
"grad_norm": 0.2717645764350891,
"learning_rate": 4.45054945054945e-06,
"loss": 1.0718,
"step": 708
},
{
"epoch": 0.5565149136577708,
"grad_norm": 0.29588571190834045,
"learning_rate": 4.442700156985872e-06,
"loss": 1.0585,
"step": 709
},
{
"epoch": 0.5572998430141287,
"grad_norm": 0.2885316014289856,
"learning_rate": 4.434850863422292e-06,
"loss": 1.0565,
"step": 710
},
{
"epoch": 0.5580847723704867,
"grad_norm": 0.2899274230003357,
"learning_rate": 4.427001569858713e-06,
"loss": 1.0994,
"step": 711
},
{
"epoch": 0.5588697017268446,
"grad_norm": 0.27042970061302185,
"learning_rate": 4.419152276295134e-06,
"loss": 1.0642,
"step": 712
},
{
"epoch": 0.5596546310832025,
"grad_norm": 0.2861323654651642,
"learning_rate": 4.411302982731555e-06,
"loss": 1.0588,
"step": 713
},
{
"epoch": 0.5604395604395604,
"grad_norm": 0.2717403471469879,
"learning_rate": 4.403453689167975e-06,
"loss": 1.0684,
"step": 714
},
{
"epoch": 0.5612244897959183,
"grad_norm": 0.2660480737686157,
"learning_rate": 4.395604395604396e-06,
"loss": 1.0709,
"step": 715
},
{
"epoch": 0.5620094191522763,
"grad_norm": 0.27813735604286194,
"learning_rate": 4.3877551020408165e-06,
"loss": 1.0424,
"step": 716
},
{
"epoch": 0.5627943485086342,
"grad_norm": 0.27436062693595886,
"learning_rate": 4.379905808477237e-06,
"loss": 1.118,
"step": 717
},
{
"epoch": 0.5635792778649922,
"grad_norm": 0.2999591827392578,
"learning_rate": 4.372056514913658e-06,
"loss": 1.0542,
"step": 718
},
{
"epoch": 0.5643642072213501,
"grad_norm": 0.2673451006412506,
"learning_rate": 4.364207221350079e-06,
"loss": 1.0464,
"step": 719
},
{
"epoch": 0.565149136577708,
"grad_norm": 0.27569347620010376,
"learning_rate": 4.3563579277865e-06,
"loss": 1.1031,
"step": 720
},
{
"epoch": 0.5659340659340659,
"grad_norm": 0.30383068323135376,
"learning_rate": 4.34850863422292e-06,
"loss": 1.145,
"step": 721
},
{
"epoch": 0.5667189952904239,
"grad_norm": 0.2751275300979614,
"learning_rate": 4.340659340659341e-06,
"loss": 1.0859,
"step": 722
},
{
"epoch": 0.5675039246467818,
"grad_norm": 0.27169832587242126,
"learning_rate": 4.332810047095762e-06,
"loss": 1.0594,
"step": 723
},
{
"epoch": 0.5682888540031397,
"grad_norm": 0.28917625546455383,
"learning_rate": 4.324960753532183e-06,
"loss": 1.0823,
"step": 724
},
{
"epoch": 0.5690737833594977,
"grad_norm": 0.2915303409099579,
"learning_rate": 4.317111459968603e-06,
"loss": 1.0867,
"step": 725
},
{
"epoch": 0.5698587127158555,
"grad_norm": 0.2857419550418854,
"learning_rate": 4.309262166405024e-06,
"loss": 1.0843,
"step": 726
},
{
"epoch": 0.5706436420722135,
"grad_norm": 0.2684113085269928,
"learning_rate": 4.301412872841444e-06,
"loss": 1.0595,
"step": 727
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.2712114155292511,
"learning_rate": 4.293563579277865e-06,
"loss": 1.0989,
"step": 728
},
{
"epoch": 0.5722135007849294,
"grad_norm": 0.2832731604576111,
"learning_rate": 4.2857142857142855e-06,
"loss": 1.0498,
"step": 729
},
{
"epoch": 0.5729984301412873,
"grad_norm": 0.44053250551223755,
"learning_rate": 4.277864992150707e-06,
"loss": 1.0979,
"step": 730
},
{
"epoch": 0.5737833594976453,
"grad_norm": 0.27188801765441895,
"learning_rate": 4.270015698587127e-06,
"loss": 1.0941,
"step": 731
},
{
"epoch": 0.5745682888540031,
"grad_norm": 0.28184184432029724,
"learning_rate": 4.262166405023548e-06,
"loss": 1.1324,
"step": 732
},
{
"epoch": 0.5753532182103611,
"grad_norm": 0.27184492349624634,
"learning_rate": 4.254317111459969e-06,
"loss": 1.0883,
"step": 733
},
{
"epoch": 0.576138147566719,
"grad_norm": 0.2704511284828186,
"learning_rate": 4.24646781789639e-06,
"loss": 1.0746,
"step": 734
},
{
"epoch": 0.5769230769230769,
"grad_norm": 0.2777252793312073,
"learning_rate": 4.2386185243328105e-06,
"loss": 1.0202,
"step": 735
},
{
"epoch": 0.5777080062794349,
"grad_norm": 0.279325932264328,
"learning_rate": 4.230769230769231e-06,
"loss": 1.0334,
"step": 736
},
{
"epoch": 0.5784929356357927,
"grad_norm": 0.3658686876296997,
"learning_rate": 4.222919937205652e-06,
"loss": 1.0314,
"step": 737
},
{
"epoch": 0.5792778649921507,
"grad_norm": 0.2697162926197052,
"learning_rate": 4.215070643642072e-06,
"loss": 1.0703,
"step": 738
},
{
"epoch": 0.5800627943485086,
"grad_norm": 0.27418413758277893,
"learning_rate": 4.207221350078493e-06,
"loss": 1.0345,
"step": 739
},
{
"epoch": 0.5808477237048666,
"grad_norm": 0.2849864065647125,
"learning_rate": 4.199372056514914e-06,
"loss": 1.0609,
"step": 740
},
{
"epoch": 0.5816326530612245,
"grad_norm": 0.29697927832603455,
"learning_rate": 4.191522762951335e-06,
"loss": 1.0578,
"step": 741
},
{
"epoch": 0.5824175824175825,
"grad_norm": 0.2827671766281128,
"learning_rate": 4.183673469387755e-06,
"loss": 1.0264,
"step": 742
},
{
"epoch": 0.5832025117739403,
"grad_norm": 0.26230207085609436,
"learning_rate": 4.175824175824177e-06,
"loss": 1.0614,
"step": 743
},
{
"epoch": 0.5839874411302983,
"grad_norm": 0.28049588203430176,
"learning_rate": 4.167974882260597e-06,
"loss": 1.0784,
"step": 744
},
{
"epoch": 0.5847723704866562,
"grad_norm": 0.2954070270061493,
"learning_rate": 4.160125588697018e-06,
"loss": 1.1016,
"step": 745
},
{
"epoch": 0.5855572998430141,
"grad_norm": 0.3010290861129761,
"learning_rate": 4.152276295133438e-06,
"loss": 1.0409,
"step": 746
},
{
"epoch": 0.5863422291993721,
"grad_norm": 0.26996880769729614,
"learning_rate": 4.144427001569859e-06,
"loss": 1.0828,
"step": 747
},
{
"epoch": 0.5871271585557299,
"grad_norm": 0.27959656715393066,
"learning_rate": 4.1365777080062794e-06,
"loss": 0.9985,
"step": 748
},
{
"epoch": 0.5879120879120879,
"grad_norm": 0.26965901255607605,
"learning_rate": 4.1287284144427e-06,
"loss": 1.0649,
"step": 749
},
{
"epoch": 0.5886970172684458,
"grad_norm": 0.2751823365688324,
"learning_rate": 4.120879120879121e-06,
"loss": 1.0422,
"step": 750
},
{
"epoch": 0.5894819466248038,
"grad_norm": 0.27731597423553467,
"learning_rate": 4.113029827315542e-06,
"loss": 1.0927,
"step": 751
},
{
"epoch": 0.5902668759811617,
"grad_norm": 0.2711530029773712,
"learning_rate": 4.1051805337519625e-06,
"loss": 1.055,
"step": 752
},
{
"epoch": 0.5910518053375197,
"grad_norm": 0.28580254316329956,
"learning_rate": 4.097331240188384e-06,
"loss": 1.0859,
"step": 753
},
{
"epoch": 0.5918367346938775,
"grad_norm": 0.2740454077720642,
"learning_rate": 4.0894819466248045e-06,
"loss": 1.0285,
"step": 754
},
{
"epoch": 0.5926216640502355,
"grad_norm": 0.28732219338417053,
"learning_rate": 4.081632653061225e-06,
"loss": 1.0953,
"step": 755
},
{
"epoch": 0.5934065934065934,
"grad_norm": 0.2803926467895508,
"learning_rate": 4.0737833594976456e-06,
"loss": 1.0135,
"step": 756
},
{
"epoch": 0.5941915227629513,
"grad_norm": 0.2755890488624573,
"learning_rate": 4.065934065934066e-06,
"loss": 1.0422,
"step": 757
},
{
"epoch": 0.5949764521193093,
"grad_norm": 0.28653979301452637,
"learning_rate": 4.058084772370487e-06,
"loss": 1.0301,
"step": 758
},
{
"epoch": 0.5957613814756672,
"grad_norm": 0.2685067355632782,
"learning_rate": 4.050235478806907e-06,
"loss": 1.03,
"step": 759
},
{
"epoch": 0.5965463108320251,
"grad_norm": 0.2731209397315979,
"learning_rate": 4.042386185243329e-06,
"loss": 1.0346,
"step": 760
},
{
"epoch": 0.597331240188383,
"grad_norm": 0.29851043224334717,
"learning_rate": 4.034536891679749e-06,
"loss": 1.0961,
"step": 761
},
{
"epoch": 0.598116169544741,
"grad_norm": 0.28820693492889404,
"learning_rate": 4.02668759811617e-06,
"loss": 1.0942,
"step": 762
},
{
"epoch": 0.5989010989010989,
"grad_norm": 0.28037169575691223,
"learning_rate": 4.018838304552591e-06,
"loss": 1.0647,
"step": 763
},
{
"epoch": 0.5996860282574569,
"grad_norm": 0.27676883339881897,
"learning_rate": 4.010989010989012e-06,
"loss": 1.0605,
"step": 764
},
{
"epoch": 0.6004709576138147,
"grad_norm": 0.274884432554245,
"learning_rate": 4.003139717425432e-06,
"loss": 1.0577,
"step": 765
},
{
"epoch": 0.6012558869701727,
"grad_norm": 0.2712654173374176,
"learning_rate": 3.995290423861853e-06,
"loss": 1.0871,
"step": 766
},
{
"epoch": 0.6020408163265306,
"grad_norm": 0.2694210708141327,
"learning_rate": 3.987441130298273e-06,
"loss": 1.0412,
"step": 767
},
{
"epoch": 0.6028257456828885,
"grad_norm": 0.2664571702480316,
"learning_rate": 3.979591836734694e-06,
"loss": 1.0413,
"step": 768
},
{
"epoch": 0.6036106750392465,
"grad_norm": 0.2823816239833832,
"learning_rate": 3.9717425431711145e-06,
"loss": 1.0511,
"step": 769
},
{
"epoch": 0.6043956043956044,
"grad_norm": 0.27906733751296997,
"learning_rate": 3.963893249607536e-06,
"loss": 1.059,
"step": 770
},
{
"epoch": 0.6051805337519623,
"grad_norm": 0.26882991194725037,
"learning_rate": 3.9560439560439565e-06,
"loss": 1.0445,
"step": 771
},
{
"epoch": 0.6059654631083202,
"grad_norm": 0.273629754781723,
"learning_rate": 3.948194662480377e-06,
"loss": 1.0566,
"step": 772
},
{
"epoch": 0.6067503924646782,
"grad_norm": 0.28593695163726807,
"learning_rate": 3.940345368916798e-06,
"loss": 1.0857,
"step": 773
},
{
"epoch": 0.6075353218210361,
"grad_norm": 0.2721453309059143,
"learning_rate": 3.932496075353219e-06,
"loss": 1.0474,
"step": 774
},
{
"epoch": 0.6083202511773941,
"grad_norm": 0.2677747905254364,
"learning_rate": 3.9246467817896395e-06,
"loss": 1.0293,
"step": 775
},
{
"epoch": 0.609105180533752,
"grad_norm": 0.27768194675445557,
"learning_rate": 3.91679748822606e-06,
"loss": 1.0645,
"step": 776
},
{
"epoch": 0.6098901098901099,
"grad_norm": 0.2910935878753662,
"learning_rate": 3.908948194662481e-06,
"loss": 1.122,
"step": 777
},
{
"epoch": 0.6106750392464678,
"grad_norm": 0.2711617946624756,
"learning_rate": 3.901098901098901e-06,
"loss": 1.0663,
"step": 778
},
{
"epoch": 0.6114599686028257,
"grad_norm": 0.2801941931247711,
"learning_rate": 3.893249607535322e-06,
"loss": 1.057,
"step": 779
},
{
"epoch": 0.6122448979591837,
"grad_norm": 0.2923711836338043,
"learning_rate": 3.885400313971743e-06,
"loss": 1.1437,
"step": 780
},
{
"epoch": 0.6130298273155416,
"grad_norm": 0.2791869342327118,
"learning_rate": 3.877551020408164e-06,
"loss": 1.0828,
"step": 781
},
{
"epoch": 0.6138147566718996,
"grad_norm": 0.34363803267478943,
"learning_rate": 3.869701726844584e-06,
"loss": 1.0993,
"step": 782
},
{
"epoch": 0.6145996860282574,
"grad_norm": 0.2742525041103363,
"learning_rate": 3.861852433281005e-06,
"loss": 1.0626,
"step": 783
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.2778567671775818,
"learning_rate": 3.854003139717426e-06,
"loss": 1.0768,
"step": 784
},
{
"epoch": 0.6161695447409733,
"grad_norm": 0.274810791015625,
"learning_rate": 3.846153846153847e-06,
"loss": 1.07,
"step": 785
},
{
"epoch": 0.6169544740973313,
"grad_norm": 0.33274561166763306,
"learning_rate": 3.838304552590267e-06,
"loss": 1.0368,
"step": 786
},
{
"epoch": 0.6177394034536892,
"grad_norm": 0.2886803448200226,
"learning_rate": 3.830455259026688e-06,
"loss": 1.0458,
"step": 787
},
{
"epoch": 0.6185243328100472,
"grad_norm": 0.3283863961696625,
"learning_rate": 3.8226059654631085e-06,
"loss": 1.0733,
"step": 788
},
{
"epoch": 0.619309262166405,
"grad_norm": 0.30149760842323303,
"learning_rate": 3.814756671899529e-06,
"loss": 1.09,
"step": 789
},
{
"epoch": 0.6200941915227629,
"grad_norm": 0.2996380627155304,
"learning_rate": 3.80690737833595e-06,
"loss": 1.0629,
"step": 790
},
{
"epoch": 0.6208791208791209,
"grad_norm": 0.3076927661895752,
"learning_rate": 3.7990580847723706e-06,
"loss": 1.051,
"step": 791
},
{
"epoch": 0.6216640502354788,
"grad_norm": 0.27474308013916016,
"learning_rate": 3.7912087912087915e-06,
"loss": 1.0259,
"step": 792
},
{
"epoch": 0.6224489795918368,
"grad_norm": 0.2838291823863983,
"learning_rate": 3.783359497645212e-06,
"loss": 1.0489,
"step": 793
},
{
"epoch": 0.6232339089481946,
"grad_norm": 0.31608259677886963,
"learning_rate": 3.7755102040816327e-06,
"loss": 1.0994,
"step": 794
},
{
"epoch": 0.6240188383045526,
"grad_norm": 0.29825273156166077,
"learning_rate": 3.767660910518054e-06,
"loss": 1.0947,
"step": 795
},
{
"epoch": 0.6248037676609105,
"grad_norm": 0.28364327549934387,
"learning_rate": 3.7598116169544746e-06,
"loss": 1.0869,
"step": 796
},
{
"epoch": 0.6255886970172685,
"grad_norm": 0.2713553011417389,
"learning_rate": 3.751962323390895e-06,
"loss": 1.0656,
"step": 797
},
{
"epoch": 0.6263736263736264,
"grad_norm": 0.27098798751831055,
"learning_rate": 3.744113029827316e-06,
"loss": 1.073,
"step": 798
},
{
"epoch": 0.6271585557299842,
"grad_norm": 0.28046950697898865,
"learning_rate": 3.7362637362637367e-06,
"loss": 1.0539,
"step": 799
},
{
"epoch": 0.6279434850863422,
"grad_norm": 0.2658367156982422,
"learning_rate": 3.7284144427001573e-06,
"loss": 1.0596,
"step": 800
},
{
"epoch": 0.6287284144427001,
"grad_norm": 0.2806791365146637,
"learning_rate": 3.720565149136578e-06,
"loss": 1.0867,
"step": 801
},
{
"epoch": 0.6295133437990581,
"grad_norm": 0.27192607522010803,
"learning_rate": 3.712715855572999e-06,
"loss": 1.0323,
"step": 802
},
{
"epoch": 0.630298273155416,
"grad_norm": 0.29174211621284485,
"learning_rate": 3.7048665620094194e-06,
"loss": 1.0436,
"step": 803
},
{
"epoch": 0.631083202511774,
"grad_norm": 0.2949182093143463,
"learning_rate": 3.69701726844584e-06,
"loss": 1.0729,
"step": 804
},
{
"epoch": 0.6318681318681318,
"grad_norm": 0.28802135586738586,
"learning_rate": 3.6891679748822605e-06,
"loss": 1.0826,
"step": 805
},
{
"epoch": 0.6326530612244898,
"grad_norm": 0.27609679102897644,
"learning_rate": 3.681318681318682e-06,
"loss": 1.0617,
"step": 806
},
{
"epoch": 0.6334379905808477,
"grad_norm": 0.30500441789627075,
"learning_rate": 3.6734693877551024e-06,
"loss": 1.0943,
"step": 807
},
{
"epoch": 0.6342229199372057,
"grad_norm": 0.2916868329048157,
"learning_rate": 3.6656200941915234e-06,
"loss": 1.1224,
"step": 808
},
{
"epoch": 0.6350078492935636,
"grad_norm": 0.3102125823497772,
"learning_rate": 3.657770800627944e-06,
"loss": 1.1176,
"step": 809
},
{
"epoch": 0.6357927786499215,
"grad_norm": 0.2802576422691345,
"learning_rate": 3.6499215070643645e-06,
"loss": 1.0794,
"step": 810
},
{
"epoch": 0.6365777080062794,
"grad_norm": 0.3083432912826538,
"learning_rate": 3.642072213500785e-06,
"loss": 1.1249,
"step": 811
},
{
"epoch": 0.6373626373626373,
"grad_norm": 0.30711638927459717,
"learning_rate": 3.634222919937206e-06,
"loss": 1.0287,
"step": 812
},
{
"epoch": 0.6381475667189953,
"grad_norm": 0.3342186510562897,
"learning_rate": 3.6263736263736266e-06,
"loss": 1.0822,
"step": 813
},
{
"epoch": 0.6389324960753532,
"grad_norm": 0.27452296018600464,
"learning_rate": 3.618524332810047e-06,
"loss": 1.0322,
"step": 814
},
{
"epoch": 0.6397174254317112,
"grad_norm": 0.2835961580276489,
"learning_rate": 3.6106750392464677e-06,
"loss": 1.0897,
"step": 815
},
{
"epoch": 0.640502354788069,
"grad_norm": 0.27237561345100403,
"learning_rate": 3.6028257456828887e-06,
"loss": 1.0461,
"step": 816
},
{
"epoch": 0.641287284144427,
"grad_norm": 0.31648552417755127,
"learning_rate": 3.5949764521193097e-06,
"loss": 1.0393,
"step": 817
},
{
"epoch": 0.6420722135007849,
"grad_norm": 0.27146708965301514,
"learning_rate": 3.5871271585557307e-06,
"loss": 1.0706,
"step": 818
},
{
"epoch": 0.6428571428571429,
"grad_norm": 0.2728872299194336,
"learning_rate": 3.5792778649921512e-06,
"loss": 1.0361,
"step": 819
},
{
"epoch": 0.6436420722135008,
"grad_norm": 0.2697795033454895,
"learning_rate": 3.5714285714285718e-06,
"loss": 1.0238,
"step": 820
},
{
"epoch": 0.6444270015698587,
"grad_norm": 0.3166142702102661,
"learning_rate": 3.5635792778649923e-06,
"loss": 1.1152,
"step": 821
},
{
"epoch": 0.6452119309262166,
"grad_norm": 0.3062928020954132,
"learning_rate": 3.5557299843014133e-06,
"loss": 1.1208,
"step": 822
},
{
"epoch": 0.6459968602825745,
"grad_norm": 0.3098381757736206,
"learning_rate": 3.547880690737834e-06,
"loss": 1.1239,
"step": 823
},
{
"epoch": 0.6467817896389325,
"grad_norm": 0.2634499669075012,
"learning_rate": 3.5400313971742544e-06,
"loss": 1.0154,
"step": 824
},
{
"epoch": 0.6475667189952904,
"grad_norm": 0.27799683809280396,
"learning_rate": 3.532182103610675e-06,
"loss": 1.0574,
"step": 825
},
{
"epoch": 0.6483516483516484,
"grad_norm": 0.29064470529556274,
"learning_rate": 3.524332810047096e-06,
"loss": 1.0962,
"step": 826
},
{
"epoch": 0.6491365777080063,
"grad_norm": 0.3558158874511719,
"learning_rate": 3.516483516483517e-06,
"loss": 1.1029,
"step": 827
},
{
"epoch": 0.6499215070643642,
"grad_norm": 0.3792993426322937,
"learning_rate": 3.5086342229199375e-06,
"loss": 1.0777,
"step": 828
},
{
"epoch": 0.6507064364207221,
"grad_norm": 0.29269522428512573,
"learning_rate": 3.5007849293563585e-06,
"loss": 1.0245,
"step": 829
},
{
"epoch": 0.6514913657770801,
"grad_norm": 0.28923287987709045,
"learning_rate": 3.492935635792779e-06,
"loss": 1.0701,
"step": 830
},
{
"epoch": 0.652276295133438,
"grad_norm": 0.285043865442276,
"learning_rate": 3.4850863422291996e-06,
"loss": 1.0538,
"step": 831
},
{
"epoch": 0.6530612244897959,
"grad_norm": 0.2893431782722473,
"learning_rate": 3.4772370486656206e-06,
"loss": 1.038,
"step": 832
},
{
"epoch": 0.6538461538461539,
"grad_norm": 0.28342151641845703,
"learning_rate": 3.469387755102041e-06,
"loss": 1.0341,
"step": 833
},
{
"epoch": 0.6546310832025117,
"grad_norm": 0.27439647912979126,
"learning_rate": 3.4615384615384617e-06,
"loss": 1.0807,
"step": 834
},
{
"epoch": 0.6554160125588697,
"grad_norm": 0.283348023891449,
"learning_rate": 3.4536891679748822e-06,
"loss": 1.0321,
"step": 835
},
{
"epoch": 0.6562009419152276,
"grad_norm": 0.3052699565887451,
"learning_rate": 3.4458398744113032e-06,
"loss": 1.0977,
"step": 836
},
{
"epoch": 0.6569858712715856,
"grad_norm": 0.30584266781806946,
"learning_rate": 3.4379905808477238e-06,
"loss": 1.0312,
"step": 837
},
{
"epoch": 0.6577708006279435,
"grad_norm": 0.2961052656173706,
"learning_rate": 3.4301412872841448e-06,
"loss": 1.064,
"step": 838
},
{
"epoch": 0.6585557299843015,
"grad_norm": 0.32309481501579285,
"learning_rate": 3.4222919937205657e-06,
"loss": 1.087,
"step": 839
},
{
"epoch": 0.6593406593406593,
"grad_norm": 0.2820388674736023,
"learning_rate": 3.4144427001569863e-06,
"loss": 1.062,
"step": 840
},
{
"epoch": 0.6601255886970173,
"grad_norm": 0.31144237518310547,
"learning_rate": 3.406593406593407e-06,
"loss": 1.0137,
"step": 841
},
{
"epoch": 0.6609105180533752,
"grad_norm": 0.2903454303741455,
"learning_rate": 3.398744113029828e-06,
"loss": 1.0571,
"step": 842
},
{
"epoch": 0.6616954474097331,
"grad_norm": 0.29997384548187256,
"learning_rate": 3.3908948194662484e-06,
"loss": 1.0569,
"step": 843
},
{
"epoch": 0.6624803767660911,
"grad_norm": 0.34617769718170166,
"learning_rate": 3.383045525902669e-06,
"loss": 1.0925,
"step": 844
},
{
"epoch": 0.6632653061224489,
"grad_norm": 0.2661650776863098,
"learning_rate": 3.3751962323390895e-06,
"loss": 1.0299,
"step": 845
},
{
"epoch": 0.6640502354788069,
"grad_norm": 0.2766907215118408,
"learning_rate": 3.3673469387755105e-06,
"loss": 1.0807,
"step": 846
},
{
"epoch": 0.6648351648351648,
"grad_norm": 0.2823966145515442,
"learning_rate": 3.359497645211931e-06,
"loss": 1.0796,
"step": 847
},
{
"epoch": 0.6656200941915228,
"grad_norm": 0.32514306902885437,
"learning_rate": 3.3516483516483516e-06,
"loss": 1.1211,
"step": 848
},
{
"epoch": 0.6664050235478807,
"grad_norm": 0.3008269965648651,
"learning_rate": 3.343799058084773e-06,
"loss": 1.0751,
"step": 849
},
{
"epoch": 0.6671899529042387,
"grad_norm": 0.30757200717926025,
"learning_rate": 3.3359497645211936e-06,
"loss": 1.1073,
"step": 850
},
{
"epoch": 0.6679748822605965,
"grad_norm": 0.2902880609035492,
"learning_rate": 3.328100470957614e-06,
"loss": 1.0854,
"step": 851
},
{
"epoch": 0.6687598116169545,
"grad_norm": 0.2838514447212219,
"learning_rate": 3.320251177394035e-06,
"loss": 1.0474,
"step": 852
},
{
"epoch": 0.6695447409733124,
"grad_norm": 0.30020883679389954,
"learning_rate": 3.3124018838304557e-06,
"loss": 1.0338,
"step": 853
},
{
"epoch": 0.6703296703296703,
"grad_norm": 0.29149070382118225,
"learning_rate": 3.304552590266876e-06,
"loss": 1.0447,
"step": 854
},
{
"epoch": 0.6711145996860283,
"grad_norm": 0.2783101201057434,
"learning_rate": 3.2967032967032968e-06,
"loss": 1.0215,
"step": 855
},
{
"epoch": 0.6718995290423861,
"grad_norm": 0.2824500501155853,
"learning_rate": 3.2888540031397177e-06,
"loss": 1.002,
"step": 856
},
{
"epoch": 0.6726844583987441,
"grad_norm": 0.2815590798854828,
"learning_rate": 3.2810047095761383e-06,
"loss": 1.1188,
"step": 857
},
{
"epoch": 0.673469387755102,
"grad_norm": 0.2877782881259918,
"learning_rate": 3.273155416012559e-06,
"loss": 1.0294,
"step": 858
},
{
"epoch": 0.67425431711146,
"grad_norm": 0.27774369716644287,
"learning_rate": 3.2653061224489794e-06,
"loss": 1.0293,
"step": 859
},
{
"epoch": 0.6750392464678179,
"grad_norm": 0.3431270122528076,
"learning_rate": 3.257456828885401e-06,
"loss": 1.072,
"step": 860
},
{
"epoch": 0.6758241758241759,
"grad_norm": 0.3327620029449463,
"learning_rate": 3.2496075353218214e-06,
"loss": 1.095,
"step": 861
},
{
"epoch": 0.6766091051805337,
"grad_norm": 0.288352370262146,
"learning_rate": 3.2417582417582424e-06,
"loss": 1.0711,
"step": 862
},
{
"epoch": 0.6773940345368917,
"grad_norm": 0.2850242257118225,
"learning_rate": 3.233908948194663e-06,
"loss": 1.0785,
"step": 863
},
{
"epoch": 0.6781789638932496,
"grad_norm": 0.2831905782222748,
"learning_rate": 3.2260596546310835e-06,
"loss": 1.0341,
"step": 864
},
{
"epoch": 0.6789638932496075,
"grad_norm": 0.284157931804657,
"learning_rate": 3.218210361067504e-06,
"loss": 1.0639,
"step": 865
},
{
"epoch": 0.6797488226059655,
"grad_norm": 0.27813270688056946,
"learning_rate": 3.210361067503925e-06,
"loss": 1.0721,
"step": 866
},
{
"epoch": 0.6805337519623234,
"grad_norm": 0.28956329822540283,
"learning_rate": 3.2025117739403456e-06,
"loss": 1.0826,
"step": 867
},
{
"epoch": 0.6813186813186813,
"grad_norm": 0.29287075996398926,
"learning_rate": 3.194662480376766e-06,
"loss": 1.1073,
"step": 868
},
{
"epoch": 0.6821036106750392,
"grad_norm": 0.2984355092048645,
"learning_rate": 3.1868131868131867e-06,
"loss": 1.0131,
"step": 869
},
{
"epoch": 0.6828885400313972,
"grad_norm": 0.2821354269981384,
"learning_rate": 3.1789638932496077e-06,
"loss": 1.0452,
"step": 870
},
{
"epoch": 0.6836734693877551,
"grad_norm": 0.27414095401763916,
"learning_rate": 3.1711145996860286e-06,
"loss": 1.0228,
"step": 871
},
{
"epoch": 0.6844583987441131,
"grad_norm": 0.2787257730960846,
"learning_rate": 3.1632653061224496e-06,
"loss": 1.0317,
"step": 872
},
{
"epoch": 0.685243328100471,
"grad_norm": 0.2801453769207001,
"learning_rate": 3.15541601255887e-06,
"loss": 1.0305,
"step": 873
},
{
"epoch": 0.6860282574568289,
"grad_norm": 0.29602035880088806,
"learning_rate": 3.1475667189952907e-06,
"loss": 1.0955,
"step": 874
},
{
"epoch": 0.6868131868131868,
"grad_norm": 0.301297664642334,
"learning_rate": 3.1397174254317113e-06,
"loss": 1.036,
"step": 875
},
{
"epoch": 0.6875981161695447,
"grad_norm": 0.3049217462539673,
"learning_rate": 3.1318681318681323e-06,
"loss": 1.1071,
"step": 876
},
{
"epoch": 0.6883830455259027,
"grad_norm": 0.2837543785572052,
"learning_rate": 3.124018838304553e-06,
"loss": 1.0273,
"step": 877
},
{
"epoch": 0.6891679748822606,
"grad_norm": 0.28585192561149597,
"learning_rate": 3.1161695447409734e-06,
"loss": 1.0464,
"step": 878
},
{
"epoch": 0.6899529042386185,
"grad_norm": 0.2744940519332886,
"learning_rate": 3.108320251177394e-06,
"loss": 1.0626,
"step": 879
},
{
"epoch": 0.6907378335949764,
"grad_norm": 0.29953551292419434,
"learning_rate": 3.100470957613815e-06,
"loss": 1.0375,
"step": 880
},
{
"epoch": 0.6915227629513344,
"grad_norm": 0.30678224563598633,
"learning_rate": 3.092621664050236e-06,
"loss": 1.0765,
"step": 881
},
{
"epoch": 0.6923076923076923,
"grad_norm": 0.29770031571388245,
"learning_rate": 3.084772370486657e-06,
"loss": 1.0751,
"step": 882
},
{
"epoch": 0.6930926216640503,
"grad_norm": 0.2807864546775818,
"learning_rate": 3.0769230769230774e-06,
"loss": 1.0866,
"step": 883
},
{
"epoch": 0.6938775510204082,
"grad_norm": 0.29043442010879517,
"learning_rate": 3.069073783359498e-06,
"loss": 1.0886,
"step": 884
},
{
"epoch": 0.6946624803767661,
"grad_norm": 0.31291401386260986,
"learning_rate": 3.0612244897959185e-06,
"loss": 1.0481,
"step": 885
},
{
"epoch": 0.695447409733124,
"grad_norm": 0.28840577602386475,
"learning_rate": 3.0533751962323395e-06,
"loss": 1.0756,
"step": 886
},
{
"epoch": 0.6962323390894819,
"grad_norm": 0.30978354811668396,
"learning_rate": 3.04552590266876e-06,
"loss": 1.0258,
"step": 887
},
{
"epoch": 0.6970172684458399,
"grad_norm": 0.29402005672454834,
"learning_rate": 3.0376766091051806e-06,
"loss": 1.0499,
"step": 888
},
{
"epoch": 0.6978021978021978,
"grad_norm": 0.29459160566329956,
"learning_rate": 3.029827315541601e-06,
"loss": 1.1056,
"step": 889
},
{
"epoch": 0.6985871271585558,
"grad_norm": 0.3101595938205719,
"learning_rate": 3.021978021978022e-06,
"loss": 1.145,
"step": 890
},
{
"epoch": 0.6993720565149136,
"grad_norm": 0.2799089550971985,
"learning_rate": 3.0141287284144427e-06,
"loss": 1.0958,
"step": 891
},
{
"epoch": 0.7001569858712716,
"grad_norm": 0.29476428031921387,
"learning_rate": 3.006279434850864e-06,
"loss": 1.109,
"step": 892
},
{
"epoch": 0.7009419152276295,
"grad_norm": 0.291610985994339,
"learning_rate": 2.9984301412872847e-06,
"loss": 1.052,
"step": 893
},
{
"epoch": 0.7017268445839875,
"grad_norm": 0.29794690012931824,
"learning_rate": 2.9905808477237053e-06,
"loss": 1.083,
"step": 894
},
{
"epoch": 0.7025117739403454,
"grad_norm": 0.28312987089157104,
"learning_rate": 2.982731554160126e-06,
"loss": 1.0448,
"step": 895
},
{
"epoch": 0.7032967032967034,
"grad_norm": 0.28827813267707825,
"learning_rate": 2.974882260596547e-06,
"loss": 1.0442,
"step": 896
},
{
"epoch": 0.7040816326530612,
"grad_norm": 0.29806190729141235,
"learning_rate": 2.9670329670329673e-06,
"loss": 1.0796,
"step": 897
},
{
"epoch": 0.7048665620094191,
"grad_norm": 0.2928798496723175,
"learning_rate": 2.959183673469388e-06,
"loss": 1.0792,
"step": 898
},
{
"epoch": 0.7056514913657771,
"grad_norm": 0.2975620925426483,
"learning_rate": 2.9513343799058085e-06,
"loss": 1.0481,
"step": 899
},
{
"epoch": 0.706436420722135,
"grad_norm": 0.2881799042224884,
"learning_rate": 2.9434850863422294e-06,
"loss": 1.0588,
"step": 900
},
{
"epoch": 0.707221350078493,
"grad_norm": 0.289421021938324,
"learning_rate": 2.93563579277865e-06,
"loss": 1.1009,
"step": 901
},
{
"epoch": 0.7080062794348508,
"grad_norm": 0.30106186866760254,
"learning_rate": 2.9277864992150706e-06,
"loss": 1.1007,
"step": 902
},
{
"epoch": 0.7087912087912088,
"grad_norm": 0.2982400059700012,
"learning_rate": 2.919937205651492e-06,
"loss": 1.0781,
"step": 903
},
{
"epoch": 0.7095761381475667,
"grad_norm": 0.27340105175971985,
"learning_rate": 2.9120879120879125e-06,
"loss": 1.0401,
"step": 904
},
{
"epoch": 0.7103610675039247,
"grad_norm": 0.2898809313774109,
"learning_rate": 2.904238618524333e-06,
"loss": 1.1035,
"step": 905
},
{
"epoch": 0.7111459968602826,
"grad_norm": 0.28713247179985046,
"learning_rate": 2.896389324960754e-06,
"loss": 1.043,
"step": 906
},
{
"epoch": 0.7119309262166404,
"grad_norm": 0.2877185046672821,
"learning_rate": 2.8885400313971746e-06,
"loss": 1.0845,
"step": 907
},
{
"epoch": 0.7127158555729984,
"grad_norm": 0.2887587249279022,
"learning_rate": 2.880690737833595e-06,
"loss": 1.0751,
"step": 908
},
{
"epoch": 0.7135007849293563,
"grad_norm": 0.2877505123615265,
"learning_rate": 2.8728414442700157e-06,
"loss": 1.0312,
"step": 909
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.28425735235214233,
"learning_rate": 2.8649921507064367e-06,
"loss": 1.0554,
"step": 910
},
{
"epoch": 0.7150706436420722,
"grad_norm": 0.2878026068210602,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.0041,
"step": 911
},
{
"epoch": 0.7158555729984302,
"grad_norm": 0.4087301194667816,
"learning_rate": 2.849293563579278e-06,
"loss": 1.0523,
"step": 912
},
{
"epoch": 0.716640502354788,
"grad_norm": 0.2949962913990021,
"learning_rate": 2.8414442700156984e-06,
"loss": 1.0276,
"step": 913
},
{
"epoch": 0.717425431711146,
"grad_norm": 0.2939402163028717,
"learning_rate": 2.8335949764521198e-06,
"loss": 1.0428,
"step": 914
},
{
"epoch": 0.7182103610675039,
"grad_norm": 0.27334320545196533,
"learning_rate": 2.8257456828885403e-06,
"loss": 1.0218,
"step": 915
},
{
"epoch": 0.7189952904238619,
"grad_norm": 0.28242912888526917,
"learning_rate": 2.8178963893249613e-06,
"loss": 1.0306,
"step": 916
},
{
"epoch": 0.7197802197802198,
"grad_norm": 0.30455905199050903,
"learning_rate": 2.810047095761382e-06,
"loss": 1.0888,
"step": 917
},
{
"epoch": 0.7205651491365777,
"grad_norm": 0.31174805760383606,
"learning_rate": 2.8021978021978024e-06,
"loss": 1.041,
"step": 918
},
{
"epoch": 0.7213500784929356,
"grad_norm": 0.30026131868362427,
"learning_rate": 2.794348508634223e-06,
"loss": 1.0294,
"step": 919
},
{
"epoch": 0.7221350078492935,
"grad_norm": 0.2858772575855255,
"learning_rate": 2.786499215070644e-06,
"loss": 1.0676,
"step": 920
},
{
"epoch": 0.7229199372056515,
"grad_norm": 0.3418976962566376,
"learning_rate": 2.7786499215070645e-06,
"loss": 1.0551,
"step": 921
},
{
"epoch": 0.7237048665620094,
"grad_norm": 0.31597593426704407,
"learning_rate": 2.770800627943485e-06,
"loss": 1.063,
"step": 922
},
{
"epoch": 0.7244897959183674,
"grad_norm": 0.2929779291152954,
"learning_rate": 2.7629513343799056e-06,
"loss": 1.0147,
"step": 923
},
{
"epoch": 0.7252747252747253,
"grad_norm": 0.2997245192527771,
"learning_rate": 2.7551020408163266e-06,
"loss": 1.1207,
"step": 924
},
{
"epoch": 0.7260596546310832,
"grad_norm": 0.2890755832195282,
"learning_rate": 2.7472527472527476e-06,
"loss": 1.0533,
"step": 925
},
{
"epoch": 0.7268445839874411,
"grad_norm": 0.29468992352485657,
"learning_rate": 2.7394034536891686e-06,
"loss": 1.0764,
"step": 926
},
{
"epoch": 0.7276295133437991,
"grad_norm": 0.28897175192832947,
"learning_rate": 2.731554160125589e-06,
"loss": 1.0755,
"step": 927
},
{
"epoch": 0.728414442700157,
"grad_norm": 0.30992481112480164,
"learning_rate": 2.7237048665620097e-06,
"loss": 1.0074,
"step": 928
},
{
"epoch": 0.7291993720565149,
"grad_norm": 0.29595842957496643,
"learning_rate": 2.7158555729984302e-06,
"loss": 1.1159,
"step": 929
},
{
"epoch": 0.7299843014128728,
"grad_norm": 0.2836659550666809,
"learning_rate": 2.7080062794348512e-06,
"loss": 1.0807,
"step": 930
},
{
"epoch": 0.7307692307692307,
"grad_norm": 0.2799653708934784,
"learning_rate": 2.7001569858712718e-06,
"loss": 1.0111,
"step": 931
},
{
"epoch": 0.7315541601255887,
"grad_norm": 0.29231712222099304,
"learning_rate": 2.6923076923076923e-06,
"loss": 1.039,
"step": 932
},
{
"epoch": 0.7323390894819466,
"grad_norm": 0.2751501202583313,
"learning_rate": 2.684458398744113e-06,
"loss": 1.0743,
"step": 933
},
{
"epoch": 0.7331240188383046,
"grad_norm": 0.2840130031108856,
"learning_rate": 2.676609105180534e-06,
"loss": 1.0533,
"step": 934
},
{
"epoch": 0.7339089481946625,
"grad_norm": 0.28328344225883484,
"learning_rate": 2.668759811616955e-06,
"loss": 1.0726,
"step": 935
},
{
"epoch": 0.7346938775510204,
"grad_norm": 0.2846105694770813,
"learning_rate": 2.660910518053376e-06,
"loss": 1.0482,
"step": 936
},
{
"epoch": 0.7354788069073783,
"grad_norm": 0.2846198081970215,
"learning_rate": 2.6530612244897964e-06,
"loss": 1.0549,
"step": 937
},
{
"epoch": 0.7362637362637363,
"grad_norm": 0.2910875678062439,
"learning_rate": 2.645211930926217e-06,
"loss": 1.0597,
"step": 938
},
{
"epoch": 0.7370486656200942,
"grad_norm": 0.3099419176578522,
"learning_rate": 2.6373626373626375e-06,
"loss": 1.1132,
"step": 939
},
{
"epoch": 0.7378335949764521,
"grad_norm": 0.30356669425964355,
"learning_rate": 2.6295133437990585e-06,
"loss": 1.0268,
"step": 940
},
{
"epoch": 0.7386185243328101,
"grad_norm": 0.28936824202537537,
"learning_rate": 2.621664050235479e-06,
"loss": 1.0237,
"step": 941
},
{
"epoch": 0.7394034536891679,
"grad_norm": 0.2909795641899109,
"learning_rate": 2.6138147566718996e-06,
"loss": 1.0425,
"step": 942
},
{
"epoch": 0.7401883830455259,
"grad_norm": 0.29834648966789246,
"learning_rate": 2.60596546310832e-06,
"loss": 1.0731,
"step": 943
},
{
"epoch": 0.7409733124018838,
"grad_norm": 0.3154754042625427,
"learning_rate": 2.598116169544741e-06,
"loss": 1.144,
"step": 944
},
{
"epoch": 0.7417582417582418,
"grad_norm": 0.2903672456741333,
"learning_rate": 2.5902668759811617e-06,
"loss": 1.0753,
"step": 945
},
{
"epoch": 0.7425431711145997,
"grad_norm": 0.28852578997612,
"learning_rate": 2.582417582417583e-06,
"loss": 1.0292,
"step": 946
},
{
"epoch": 0.7433281004709577,
"grad_norm": 0.2857038080692291,
"learning_rate": 2.5745682888540036e-06,
"loss": 1.0717,
"step": 947
},
{
"epoch": 0.7441130298273155,
"grad_norm": 0.2909829914569855,
"learning_rate": 2.566718995290424e-06,
"loss": 1.0209,
"step": 948
},
{
"epoch": 0.7448979591836735,
"grad_norm": 0.2876448631286621,
"learning_rate": 2.5588697017268448e-06,
"loss": 1.0688,
"step": 949
},
{
"epoch": 0.7456828885400314,
"grad_norm": 0.2869911789894104,
"learning_rate": 2.5510204081632657e-06,
"loss": 1.0332,
"step": 950
},
{
"epoch": 0.7464678178963893,
"grad_norm": 0.2981649935245514,
"learning_rate": 2.5431711145996863e-06,
"loss": 1.0635,
"step": 951
},
{
"epoch": 0.7472527472527473,
"grad_norm": 0.28783732652664185,
"learning_rate": 2.535321821036107e-06,
"loss": 1.0492,
"step": 952
},
{
"epoch": 0.7480376766091051,
"grad_norm": 0.28739696741104126,
"learning_rate": 2.5274725274725274e-06,
"loss": 1.0318,
"step": 953
},
{
"epoch": 0.7488226059654631,
"grad_norm": 0.28677845001220703,
"learning_rate": 2.5196232339089484e-06,
"loss": 1.032,
"step": 954
},
{
"epoch": 0.749607535321821,
"grad_norm": 0.3001886010169983,
"learning_rate": 2.511773940345369e-06,
"loss": 1.0855,
"step": 955
},
{
"epoch": 0.750392464678179,
"grad_norm": 0.2894863486289978,
"learning_rate": 2.5039246467817895e-06,
"loss": 1.044,
"step": 956
},
{
"epoch": 0.7511773940345369,
"grad_norm": 0.29826030135154724,
"learning_rate": 2.4960753532182105e-06,
"loss": 1.0313,
"step": 957
},
{
"epoch": 0.7519623233908949,
"grad_norm": 0.30050304532051086,
"learning_rate": 2.488226059654631e-06,
"loss": 1.0575,
"step": 958
},
{
"epoch": 0.7527472527472527,
"grad_norm": 0.3099324107170105,
"learning_rate": 2.480376766091052e-06,
"loss": 1.0636,
"step": 959
},
{
"epoch": 0.7535321821036107,
"grad_norm": 0.2929956912994385,
"learning_rate": 2.472527472527473e-06,
"loss": 1.0516,
"step": 960
},
{
"epoch": 0.7543171114599686,
"grad_norm": 0.29828134179115295,
"learning_rate": 2.4646781789638936e-06,
"loss": 1.0582,
"step": 961
},
{
"epoch": 0.7551020408163265,
"grad_norm": 0.3314920663833618,
"learning_rate": 2.456828885400314e-06,
"loss": 1.0783,
"step": 962
},
{
"epoch": 0.7558869701726845,
"grad_norm": 0.3030723035335541,
"learning_rate": 2.4489795918367347e-06,
"loss": 1.0155,
"step": 963
},
{
"epoch": 0.7566718995290423,
"grad_norm": 0.28593307733535767,
"learning_rate": 2.4411302982731556e-06,
"loss": 1.0266,
"step": 964
},
{
"epoch": 0.7574568288854003,
"grad_norm": 0.2924596667289734,
"learning_rate": 2.4332810047095766e-06,
"loss": 1.0367,
"step": 965
},
{
"epoch": 0.7582417582417582,
"grad_norm": 0.30590420961380005,
"learning_rate": 2.425431711145997e-06,
"loss": 1.0675,
"step": 966
},
{
"epoch": 0.7590266875981162,
"grad_norm": 0.30233892798423767,
"learning_rate": 2.4175824175824177e-06,
"loss": 1.0837,
"step": 967
},
{
"epoch": 0.7598116169544741,
"grad_norm": 0.32924067974090576,
"learning_rate": 2.4097331240188383e-06,
"loss": 1.109,
"step": 968
},
{
"epoch": 0.7605965463108321,
"grad_norm": 0.29074007272720337,
"learning_rate": 2.4018838304552593e-06,
"loss": 1.0616,
"step": 969
},
{
"epoch": 0.7613814756671899,
"grad_norm": 0.29699182510375977,
"learning_rate": 2.3940345368916803e-06,
"loss": 1.0717,
"step": 970
},
{
"epoch": 0.7621664050235479,
"grad_norm": 0.3066222071647644,
"learning_rate": 2.386185243328101e-06,
"loss": 1.0731,
"step": 971
},
{
"epoch": 0.7629513343799058,
"grad_norm": 0.37514591217041016,
"learning_rate": 2.3783359497645214e-06,
"loss": 1.0188,
"step": 972
},
{
"epoch": 0.7637362637362637,
"grad_norm": 0.33850035071372986,
"learning_rate": 2.370486656200942e-06,
"loss": 1.1529,
"step": 973
},
{
"epoch": 0.7645211930926217,
"grad_norm": 0.2899448275566101,
"learning_rate": 2.362637362637363e-06,
"loss": 1.0658,
"step": 974
},
{
"epoch": 0.7653061224489796,
"grad_norm": 0.3076562285423279,
"learning_rate": 2.3547880690737835e-06,
"loss": 1.028,
"step": 975
},
{
"epoch": 0.7660910518053375,
"grad_norm": 0.3137950599193573,
"learning_rate": 2.3469387755102044e-06,
"loss": 1.0447,
"step": 976
},
{
"epoch": 0.7668759811616954,
"grad_norm": 0.30430495738983154,
"learning_rate": 2.339089481946625e-06,
"loss": 1.105,
"step": 977
},
{
"epoch": 0.7676609105180534,
"grad_norm": 0.38252878189086914,
"learning_rate": 2.3312401883830456e-06,
"loss": 1.1403,
"step": 978
},
{
"epoch": 0.7684458398744113,
"grad_norm": 0.30081915855407715,
"learning_rate": 2.3233908948194665e-06,
"loss": 1.109,
"step": 979
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.29588279128074646,
"learning_rate": 2.315541601255887e-06,
"loss": 1.0319,
"step": 980
},
{
"epoch": 0.7700156985871272,
"grad_norm": 0.3080218434333801,
"learning_rate": 2.307692307692308e-06,
"loss": 1.039,
"step": 981
},
{
"epoch": 0.7708006279434851,
"grad_norm": 0.2921229302883148,
"learning_rate": 2.2998430141287286e-06,
"loss": 1.0493,
"step": 982
},
{
"epoch": 0.771585557299843,
"grad_norm": 0.2984630763530731,
"learning_rate": 2.291993720565149e-06,
"loss": 1.0191,
"step": 983
},
{
"epoch": 0.7723704866562009,
"grad_norm": 0.28374841809272766,
"learning_rate": 2.28414442700157e-06,
"loss": 1.0103,
"step": 984
},
{
"epoch": 0.7731554160125589,
"grad_norm": 0.3007064163684845,
"learning_rate": 2.2762951334379907e-06,
"loss": 1.0387,
"step": 985
},
{
"epoch": 0.7739403453689168,
"grad_norm": 0.2927864193916321,
"learning_rate": 2.2684458398744113e-06,
"loss": 1.005,
"step": 986
},
{
"epoch": 0.7747252747252747,
"grad_norm": 0.3065125048160553,
"learning_rate": 2.2605965463108323e-06,
"loss": 1.0851,
"step": 987
},
{
"epoch": 0.7755102040816326,
"grad_norm": 0.27849113941192627,
"learning_rate": 2.252747252747253e-06,
"loss": 1.0251,
"step": 988
},
{
"epoch": 0.7762951334379906,
"grad_norm": 0.2948971688747406,
"learning_rate": 2.244897959183674e-06,
"loss": 1.0274,
"step": 989
},
{
"epoch": 0.7770800627943485,
"grad_norm": 0.3202616274356842,
"learning_rate": 2.2370486656200944e-06,
"loss": 1.0485,
"step": 990
},
{
"epoch": 0.7778649921507065,
"grad_norm": 0.3328281342983246,
"learning_rate": 2.229199372056515e-06,
"loss": 1.069,
"step": 991
},
{
"epoch": 0.7786499215070644,
"grad_norm": 0.3161095976829529,
"learning_rate": 2.221350078492936e-06,
"loss": 1.0848,
"step": 992
},
{
"epoch": 0.7794348508634223,
"grad_norm": 0.28824999928474426,
"learning_rate": 2.2135007849293564e-06,
"loss": 1.0654,
"step": 993
},
{
"epoch": 0.7802197802197802,
"grad_norm": 0.3667064309120178,
"learning_rate": 2.2056514913657774e-06,
"loss": 1.0116,
"step": 994
},
{
"epoch": 0.7810047095761381,
"grad_norm": 0.2977278530597687,
"learning_rate": 2.197802197802198e-06,
"loss": 1.0814,
"step": 995
},
{
"epoch": 0.7817896389324961,
"grad_norm": 0.29998522996902466,
"learning_rate": 2.1899529042386185e-06,
"loss": 1.0431,
"step": 996
},
{
"epoch": 0.782574568288854,
"grad_norm": 0.31411993503570557,
"learning_rate": 2.1821036106750395e-06,
"loss": 1.0926,
"step": 997
},
{
"epoch": 0.783359497645212,
"grad_norm": 0.29877665638923645,
"learning_rate": 2.17425431711146e-06,
"loss": 1.0989,
"step": 998
},
{
"epoch": 0.7841444270015698,
"grad_norm": 0.2992810010910034,
"learning_rate": 2.166405023547881e-06,
"loss": 1.0119,
"step": 999
},
{
"epoch": 0.7849293563579278,
"grad_norm": 0.2953478991985321,
"learning_rate": 2.1585557299843016e-06,
"loss": 1.0669,
"step": 1000
},
{
"epoch": 0.7857142857142857,
"grad_norm": 0.29331153631210327,
"learning_rate": 2.150706436420722e-06,
"loss": 1.0554,
"step": 1001
},
{
"epoch": 0.7864992150706437,
"grad_norm": 0.2879624664783478,
"learning_rate": 2.1428571428571427e-06,
"loss": 1.0212,
"step": 1002
},
{
"epoch": 0.7872841444270016,
"grad_norm": 0.2884847819805145,
"learning_rate": 2.1350078492935637e-06,
"loss": 1.0376,
"step": 1003
},
{
"epoch": 0.7880690737833596,
"grad_norm": 0.29227468371391296,
"learning_rate": 2.1271585557299847e-06,
"loss": 1.0444,
"step": 1004
},
{
"epoch": 0.7888540031397174,
"grad_norm": 0.3019685745239258,
"learning_rate": 2.1193092621664052e-06,
"loss": 1.1065,
"step": 1005
},
{
"epoch": 0.7896389324960753,
"grad_norm": 0.287661075592041,
"learning_rate": 2.111459968602826e-06,
"loss": 1.0141,
"step": 1006
},
{
"epoch": 0.7904238618524333,
"grad_norm": 0.29692256450653076,
"learning_rate": 2.1036106750392464e-06,
"loss": 1.0183,
"step": 1007
},
{
"epoch": 0.7912087912087912,
"grad_norm": 0.29249200224876404,
"learning_rate": 2.0957613814756673e-06,
"loss": 1.0084,
"step": 1008
},
{
"epoch": 0.7919937205651492,
"grad_norm": 0.31126755475997925,
"learning_rate": 2.0879120879120883e-06,
"loss": 1.023,
"step": 1009
},
{
"epoch": 0.792778649921507,
"grad_norm": 0.29185745120048523,
"learning_rate": 2.080062794348509e-06,
"loss": 0.9944,
"step": 1010
},
{
"epoch": 0.793563579277865,
"grad_norm": 0.30141139030456543,
"learning_rate": 2.0722135007849294e-06,
"loss": 1.0885,
"step": 1011
},
{
"epoch": 0.7943485086342229,
"grad_norm": 0.29048752784729004,
"learning_rate": 2.06436420722135e-06,
"loss": 1.0231,
"step": 1012
},
{
"epoch": 0.7951334379905809,
"grad_norm": 0.3008350431919098,
"learning_rate": 2.056514913657771e-06,
"loss": 1.0388,
"step": 1013
},
{
"epoch": 0.7959183673469388,
"grad_norm": 0.30450665950775146,
"learning_rate": 2.048665620094192e-06,
"loss": 1.0316,
"step": 1014
},
{
"epoch": 0.7967032967032966,
"grad_norm": 0.34311988949775696,
"learning_rate": 2.0408163265306125e-06,
"loss": 1.1252,
"step": 1015
},
{
"epoch": 0.7974882260596546,
"grad_norm": 0.28808602690696716,
"learning_rate": 2.032967032967033e-06,
"loss": 1.0348,
"step": 1016
},
{
"epoch": 0.7982731554160125,
"grad_norm": 0.28176361322402954,
"learning_rate": 2.0251177394034536e-06,
"loss": 1.0127,
"step": 1017
},
{
"epoch": 0.7990580847723705,
"grad_norm": 0.30243223905563354,
"learning_rate": 2.0172684458398746e-06,
"loss": 1.0655,
"step": 1018
},
{
"epoch": 0.7998430141287284,
"grad_norm": 0.2991596460342407,
"learning_rate": 2.0094191522762956e-06,
"loss": 1.071,
"step": 1019
},
{
"epoch": 0.8006279434850864,
"grad_norm": 0.31719931960105896,
"learning_rate": 2.001569858712716e-06,
"loss": 1.0703,
"step": 1020
},
{
"epoch": 0.8014128728414442,
"grad_norm": 0.28864073753356934,
"learning_rate": 1.9937205651491367e-06,
"loss": 1.0865,
"step": 1021
},
{
"epoch": 0.8021978021978022,
"grad_norm": 0.2995680272579193,
"learning_rate": 1.9858712715855573e-06,
"loss": 1.043,
"step": 1022
},
{
"epoch": 0.8029827315541601,
"grad_norm": 0.30036595463752747,
"learning_rate": 1.9780219780219782e-06,
"loss": 1.0925,
"step": 1023
},
{
"epoch": 0.8037676609105181,
"grad_norm": 0.2845197916030884,
"learning_rate": 1.970172684458399e-06,
"loss": 1.0439,
"step": 1024
},
{
"epoch": 0.804552590266876,
"grad_norm": 0.29325416684150696,
"learning_rate": 1.9623233908948198e-06,
"loss": 1.047,
"step": 1025
},
{
"epoch": 0.8053375196232339,
"grad_norm": 0.2978193163871765,
"learning_rate": 1.9544740973312403e-06,
"loss": 1.0581,
"step": 1026
},
{
"epoch": 0.8061224489795918,
"grad_norm": 0.31198781728744507,
"learning_rate": 1.946624803767661e-06,
"loss": 1.0275,
"step": 1027
},
{
"epoch": 0.8069073783359497,
"grad_norm": 0.28849077224731445,
"learning_rate": 1.938775510204082e-06,
"loss": 1.0534,
"step": 1028
},
{
"epoch": 0.8076923076923077,
"grad_norm": 0.3035949766635895,
"learning_rate": 1.9309262166405024e-06,
"loss": 0.9924,
"step": 1029
},
{
"epoch": 0.8084772370486656,
"grad_norm": 0.330161988735199,
"learning_rate": 1.9230769230769234e-06,
"loss": 1.0625,
"step": 1030
},
{
"epoch": 0.8092621664050236,
"grad_norm": 0.28037184476852417,
"learning_rate": 1.915227629513344e-06,
"loss": 1.0381,
"step": 1031
},
{
"epoch": 0.8100470957613815,
"grad_norm": 0.3421080410480499,
"learning_rate": 1.9073783359497645e-06,
"loss": 1.0885,
"step": 1032
},
{
"epoch": 0.8108320251177394,
"grad_norm": 0.3068152368068695,
"learning_rate": 1.8995290423861853e-06,
"loss": 1.0788,
"step": 1033
},
{
"epoch": 0.8116169544740973,
"grad_norm": 0.2811432182788849,
"learning_rate": 1.891679748822606e-06,
"loss": 1.0359,
"step": 1034
},
{
"epoch": 0.8124018838304553,
"grad_norm": 0.5453617572784424,
"learning_rate": 1.883830455259027e-06,
"loss": 1.0151,
"step": 1035
},
{
"epoch": 0.8131868131868132,
"grad_norm": 0.28949666023254395,
"learning_rate": 1.8759811616954476e-06,
"loss": 1.0557,
"step": 1036
},
{
"epoch": 0.8139717425431711,
"grad_norm": 0.2827453017234802,
"learning_rate": 1.8681318681318684e-06,
"loss": 1.006,
"step": 1037
},
{
"epoch": 0.814756671899529,
"grad_norm": 0.2997809648513794,
"learning_rate": 1.860282574568289e-06,
"loss": 1.025,
"step": 1038
},
{
"epoch": 0.8155416012558869,
"grad_norm": 0.3027696907520294,
"learning_rate": 1.8524332810047097e-06,
"loss": 1.0502,
"step": 1039
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.3114776909351349,
"learning_rate": 1.8445839874411302e-06,
"loss": 1.1017,
"step": 1040
},
{
"epoch": 0.8171114599686028,
"grad_norm": 0.2964245676994324,
"learning_rate": 1.8367346938775512e-06,
"loss": 1.0149,
"step": 1041
},
{
"epoch": 0.8178963893249608,
"grad_norm": 0.2923440933227539,
"learning_rate": 1.828885400313972e-06,
"loss": 1.073,
"step": 1042
},
{
"epoch": 0.8186813186813187,
"grad_norm": 0.2958196699619293,
"learning_rate": 1.8210361067503925e-06,
"loss": 1.0788,
"step": 1043
},
{
"epoch": 0.8194662480376766,
"grad_norm": 0.32801884412765503,
"learning_rate": 1.8131868131868133e-06,
"loss": 1.0645,
"step": 1044
},
{
"epoch": 0.8202511773940345,
"grad_norm": 0.30840179324150085,
"learning_rate": 1.8053375196232339e-06,
"loss": 1.0397,
"step": 1045
},
{
"epoch": 0.8210361067503925,
"grad_norm": 0.2844547927379608,
"learning_rate": 1.7974882260596548e-06,
"loss": 1.0534,
"step": 1046
},
{
"epoch": 0.8218210361067504,
"grad_norm": 0.31664690375328064,
"learning_rate": 1.7896389324960756e-06,
"loss": 1.0628,
"step": 1047
},
{
"epoch": 0.8226059654631083,
"grad_norm": 0.3169183135032654,
"learning_rate": 1.7817896389324962e-06,
"loss": 1.0954,
"step": 1048
},
{
"epoch": 0.8233908948194663,
"grad_norm": 0.2980157136917114,
"learning_rate": 1.773940345368917e-06,
"loss": 1.0459,
"step": 1049
},
{
"epoch": 0.8241758241758241,
"grad_norm": 0.3251033425331116,
"learning_rate": 1.7660910518053375e-06,
"loss": 1.1277,
"step": 1050
},
{
"epoch": 0.8249607535321821,
"grad_norm": 0.3023360073566437,
"learning_rate": 1.7582417582417585e-06,
"loss": 1.0413,
"step": 1051
},
{
"epoch": 0.82574568288854,
"grad_norm": 0.33668410778045654,
"learning_rate": 1.7503924646781792e-06,
"loss": 1.0721,
"step": 1052
},
{
"epoch": 0.826530612244898,
"grad_norm": 0.30133289098739624,
"learning_rate": 1.7425431711145998e-06,
"loss": 1.0245,
"step": 1053
},
{
"epoch": 0.8273155416012559,
"grad_norm": 0.30766019225120544,
"learning_rate": 1.7346938775510206e-06,
"loss": 1.0795,
"step": 1054
},
{
"epoch": 0.8281004709576139,
"grad_norm": 0.2983943521976471,
"learning_rate": 1.7268445839874411e-06,
"loss": 1.0699,
"step": 1055
},
{
"epoch": 0.8288854003139717,
"grad_norm": 0.3063719570636749,
"learning_rate": 1.7189952904238619e-06,
"loss": 1.0624,
"step": 1056
},
{
"epoch": 0.8296703296703297,
"grad_norm": 0.3908691704273224,
"learning_rate": 1.7111459968602829e-06,
"loss": 1.0461,
"step": 1057
},
{
"epoch": 0.8304552590266876,
"grad_norm": 0.30761247873306274,
"learning_rate": 1.7032967032967034e-06,
"loss": 1.0858,
"step": 1058
},
{
"epoch": 0.8312401883830455,
"grad_norm": 0.2950478792190552,
"learning_rate": 1.6954474097331242e-06,
"loss": 1.0143,
"step": 1059
},
{
"epoch": 0.8320251177394035,
"grad_norm": 0.30142104625701904,
"learning_rate": 1.6875981161695448e-06,
"loss": 1.0693,
"step": 1060
},
{
"epoch": 0.8328100470957613,
"grad_norm": 0.30439862608909607,
"learning_rate": 1.6797488226059655e-06,
"loss": 1.0668,
"step": 1061
},
{
"epoch": 0.8335949764521193,
"grad_norm": 0.2978014349937439,
"learning_rate": 1.6718995290423865e-06,
"loss": 1.0782,
"step": 1062
},
{
"epoch": 0.8343799058084772,
"grad_norm": 0.28805792331695557,
"learning_rate": 1.664050235478807e-06,
"loss": 1.0318,
"step": 1063
},
{
"epoch": 0.8351648351648352,
"grad_norm": 0.29518917202949524,
"learning_rate": 1.6562009419152278e-06,
"loss": 1.0475,
"step": 1064
},
{
"epoch": 0.8359497645211931,
"grad_norm": 0.2990979552268982,
"learning_rate": 1.6483516483516484e-06,
"loss": 1.0352,
"step": 1065
},
{
"epoch": 0.8367346938775511,
"grad_norm": 0.3014602065086365,
"learning_rate": 1.6405023547880692e-06,
"loss": 1.0568,
"step": 1066
},
{
"epoch": 0.8375196232339089,
"grad_norm": 0.2860758602619171,
"learning_rate": 1.6326530612244897e-06,
"loss": 1.0387,
"step": 1067
},
{
"epoch": 0.8383045525902669,
"grad_norm": 0.293454647064209,
"learning_rate": 1.6248037676609107e-06,
"loss": 1.0444,
"step": 1068
},
{
"epoch": 0.8390894819466248,
"grad_norm": 0.2916511297225952,
"learning_rate": 1.6169544740973315e-06,
"loss": 1.0512,
"step": 1069
},
{
"epoch": 0.8398744113029827,
"grad_norm": 0.30183103680610657,
"learning_rate": 1.609105180533752e-06,
"loss": 1.0257,
"step": 1070
},
{
"epoch": 0.8406593406593407,
"grad_norm": 0.3018069267272949,
"learning_rate": 1.6012558869701728e-06,
"loss": 1.0665,
"step": 1071
},
{
"epoch": 0.8414442700156985,
"grad_norm": 0.27915433049201965,
"learning_rate": 1.5934065934065933e-06,
"loss": 1.0397,
"step": 1072
},
{
"epoch": 0.8422291993720565,
"grad_norm": 0.3076684772968292,
"learning_rate": 1.5855572998430143e-06,
"loss": 1.0981,
"step": 1073
},
{
"epoch": 0.8430141287284144,
"grad_norm": 0.29367414116859436,
"learning_rate": 1.577708006279435e-06,
"loss": 1.0547,
"step": 1074
},
{
"epoch": 0.8437990580847724,
"grad_norm": 0.2984970211982727,
"learning_rate": 1.5698587127158556e-06,
"loss": 1.0494,
"step": 1075
},
{
"epoch": 0.8445839874411303,
"grad_norm": 0.29925835132598877,
"learning_rate": 1.5620094191522764e-06,
"loss": 1.0686,
"step": 1076
},
{
"epoch": 0.8453689167974883,
"grad_norm": 0.2967824935913086,
"learning_rate": 1.554160125588697e-06,
"loss": 1.1013,
"step": 1077
},
{
"epoch": 0.8461538461538461,
"grad_norm": 0.30394864082336426,
"learning_rate": 1.546310832025118e-06,
"loss": 1.0908,
"step": 1078
},
{
"epoch": 0.8469387755102041,
"grad_norm": 0.3596284091472626,
"learning_rate": 1.5384615384615387e-06,
"loss": 1.0496,
"step": 1079
},
{
"epoch": 0.847723704866562,
"grad_norm": 0.30267584323883057,
"learning_rate": 1.5306122448979593e-06,
"loss": 1.063,
"step": 1080
},
{
"epoch": 0.8485086342229199,
"grad_norm": 0.2946220934391022,
"learning_rate": 1.52276295133438e-06,
"loss": 1.056,
"step": 1081
},
{
"epoch": 0.8492935635792779,
"grad_norm": 0.2994774281978607,
"learning_rate": 1.5149136577708006e-06,
"loss": 1.055,
"step": 1082
},
{
"epoch": 0.8500784929356358,
"grad_norm": 0.2964215576648712,
"learning_rate": 1.5070643642072214e-06,
"loss": 1.0636,
"step": 1083
},
{
"epoch": 0.8508634222919937,
"grad_norm": 0.313342422246933,
"learning_rate": 1.4992150706436423e-06,
"loss": 1.0341,
"step": 1084
},
{
"epoch": 0.8516483516483516,
"grad_norm": 0.30197572708129883,
"learning_rate": 1.491365777080063e-06,
"loss": 1.0172,
"step": 1085
},
{
"epoch": 0.8524332810047096,
"grad_norm": 0.3112085461616516,
"learning_rate": 1.4835164835164837e-06,
"loss": 1.0727,
"step": 1086
},
{
"epoch": 0.8532182103610675,
"grad_norm": 0.3003901243209839,
"learning_rate": 1.4756671899529042e-06,
"loss": 1.0527,
"step": 1087
},
{
"epoch": 0.8540031397174255,
"grad_norm": 0.2971203327178955,
"learning_rate": 1.467817896389325e-06,
"loss": 1.041,
"step": 1088
},
{
"epoch": 0.8547880690737834,
"grad_norm": 0.3106226921081543,
"learning_rate": 1.459968602825746e-06,
"loss": 1.1165,
"step": 1089
},
{
"epoch": 0.8555729984301413,
"grad_norm": 0.31148043274879456,
"learning_rate": 1.4521193092621665e-06,
"loss": 1.0847,
"step": 1090
},
{
"epoch": 0.8563579277864992,
"grad_norm": 0.29365935921669006,
"learning_rate": 1.4442700156985873e-06,
"loss": 1.013,
"step": 1091
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.30795639753341675,
"learning_rate": 1.4364207221350079e-06,
"loss": 1.0386,
"step": 1092
},
{
"epoch": 0.8579277864992151,
"grad_norm": 0.2929840087890625,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.036,
"step": 1093
},
{
"epoch": 0.858712715855573,
"grad_norm": 0.31012848019599915,
"learning_rate": 1.4207221350078492e-06,
"loss": 1.0885,
"step": 1094
},
{
"epoch": 0.859497645211931,
"grad_norm": 0.28400346636772156,
"learning_rate": 1.4128728414442702e-06,
"loss": 1.0615,
"step": 1095
},
{
"epoch": 0.8602825745682888,
"grad_norm": 0.28853991627693176,
"learning_rate": 1.405023547880691e-06,
"loss": 1.0612,
"step": 1096
},
{
"epoch": 0.8610675039246468,
"grad_norm": 0.28974905610084534,
"learning_rate": 1.3971742543171115e-06,
"loss": 1.0506,
"step": 1097
},
{
"epoch": 0.8618524332810047,
"grad_norm": 0.3157826066017151,
"learning_rate": 1.3893249607535323e-06,
"loss": 1.0691,
"step": 1098
},
{
"epoch": 0.8626373626373627,
"grad_norm": 0.28993314504623413,
"learning_rate": 1.3814756671899528e-06,
"loss": 1.0108,
"step": 1099
},
{
"epoch": 0.8634222919937206,
"grad_norm": 0.282035231590271,
"learning_rate": 1.3736263736263738e-06,
"loss": 1.0208,
"step": 1100
},
{
"epoch": 0.8642072213500785,
"grad_norm": 0.30081960558891296,
"learning_rate": 1.3657770800627946e-06,
"loss": 1.0423,
"step": 1101
},
{
"epoch": 0.8649921507064364,
"grad_norm": 0.3326198160648346,
"learning_rate": 1.3579277864992151e-06,
"loss": 1.0326,
"step": 1102
},
{
"epoch": 0.8657770800627943,
"grad_norm": 0.2900926470756531,
"learning_rate": 1.3500784929356359e-06,
"loss": 1.0525,
"step": 1103
},
{
"epoch": 0.8665620094191523,
"grad_norm": 0.28752028942108154,
"learning_rate": 1.3422291993720564e-06,
"loss": 1.0074,
"step": 1104
},
{
"epoch": 0.8673469387755102,
"grad_norm": 0.2825300395488739,
"learning_rate": 1.3343799058084774e-06,
"loss": 1.0223,
"step": 1105
},
{
"epoch": 0.8681318681318682,
"grad_norm": 0.30561885237693787,
"learning_rate": 1.3265306122448982e-06,
"loss": 1.0464,
"step": 1106
},
{
"epoch": 0.868916797488226,
"grad_norm": 0.28518933057785034,
"learning_rate": 1.3186813186813187e-06,
"loss": 1.0464,
"step": 1107
},
{
"epoch": 0.869701726844584,
"grad_norm": 0.2933896780014038,
"learning_rate": 1.3108320251177395e-06,
"loss": 1.0177,
"step": 1108
},
{
"epoch": 0.8704866562009419,
"grad_norm": 0.30531638860702515,
"learning_rate": 1.30298273155416e-06,
"loss": 0.9894,
"step": 1109
},
{
"epoch": 0.8712715855572999,
"grad_norm": 0.2906123995780945,
"learning_rate": 1.2951334379905808e-06,
"loss": 1.0403,
"step": 1110
},
{
"epoch": 0.8720565149136578,
"grad_norm": 0.29348504543304443,
"learning_rate": 1.2872841444270018e-06,
"loss": 1.0439,
"step": 1111
},
{
"epoch": 0.8728414442700158,
"grad_norm": 0.3133821189403534,
"learning_rate": 1.2794348508634224e-06,
"loss": 1.0984,
"step": 1112
},
{
"epoch": 0.8736263736263736,
"grad_norm": 0.2935754358768463,
"learning_rate": 1.2715855572998431e-06,
"loss": 1.0538,
"step": 1113
},
{
"epoch": 0.8744113029827315,
"grad_norm": 0.484567791223526,
"learning_rate": 1.2637362637362637e-06,
"loss": 0.9939,
"step": 1114
},
{
"epoch": 0.8751962323390895,
"grad_norm": 0.2972055673599243,
"learning_rate": 1.2558869701726845e-06,
"loss": 1.0524,
"step": 1115
},
{
"epoch": 0.8759811616954474,
"grad_norm": 0.3031924366950989,
"learning_rate": 1.2480376766091052e-06,
"loss": 1.0597,
"step": 1116
},
{
"epoch": 0.8767660910518054,
"grad_norm": 0.2977665662765503,
"learning_rate": 1.240188383045526e-06,
"loss": 1.0796,
"step": 1117
},
{
"epoch": 0.8775510204081632,
"grad_norm": 0.3066113591194153,
"learning_rate": 1.2323390894819468e-06,
"loss": 1.0741,
"step": 1118
},
{
"epoch": 0.8783359497645212,
"grad_norm": 0.30936139822006226,
"learning_rate": 1.2244897959183673e-06,
"loss": 1.0501,
"step": 1119
},
{
"epoch": 0.8791208791208791,
"grad_norm": 0.32315778732299805,
"learning_rate": 1.2166405023547883e-06,
"loss": 1.0674,
"step": 1120
},
{
"epoch": 0.8799058084772371,
"grad_norm": 0.3213968276977539,
"learning_rate": 1.2087912087912089e-06,
"loss": 1.079,
"step": 1121
},
{
"epoch": 0.880690737833595,
"grad_norm": 0.30559977889060974,
"learning_rate": 1.2009419152276296e-06,
"loss": 1.0145,
"step": 1122
},
{
"epoch": 0.8814756671899528,
"grad_norm": 0.32773932814598083,
"learning_rate": 1.1930926216640504e-06,
"loss": 1.0227,
"step": 1123
},
{
"epoch": 0.8822605965463108,
"grad_norm": 0.2936771810054779,
"learning_rate": 1.185243328100471e-06,
"loss": 1.0222,
"step": 1124
},
{
"epoch": 0.8830455259026687,
"grad_norm": 0.3083963096141815,
"learning_rate": 1.1773940345368917e-06,
"loss": 1.0424,
"step": 1125
},
{
"epoch": 0.8838304552590267,
"grad_norm": 0.3056409955024719,
"learning_rate": 1.1695447409733125e-06,
"loss": 1.087,
"step": 1126
},
{
"epoch": 0.8846153846153846,
"grad_norm": 0.2972738444805145,
"learning_rate": 1.1616954474097333e-06,
"loss": 1.034,
"step": 1127
},
{
"epoch": 0.8854003139717426,
"grad_norm": 0.28523531556129456,
"learning_rate": 1.153846153846154e-06,
"loss": 1.0371,
"step": 1128
},
{
"epoch": 0.8861852433281004,
"grad_norm": 0.3151058852672577,
"learning_rate": 1.1459968602825746e-06,
"loss": 1.0826,
"step": 1129
},
{
"epoch": 0.8869701726844584,
"grad_norm": 0.2846231460571289,
"learning_rate": 1.1381475667189954e-06,
"loss": 1.0419,
"step": 1130
},
{
"epoch": 0.8877551020408163,
"grad_norm": 0.3189791738986969,
"learning_rate": 1.1302982731554161e-06,
"loss": 1.0161,
"step": 1131
},
{
"epoch": 0.8885400313971743,
"grad_norm": 0.29699793457984924,
"learning_rate": 1.122448979591837e-06,
"loss": 1.0944,
"step": 1132
},
{
"epoch": 0.8893249607535322,
"grad_norm": 0.31134846806526184,
"learning_rate": 1.1145996860282575e-06,
"loss": 1.0405,
"step": 1133
},
{
"epoch": 0.8901098901098901,
"grad_norm": 0.3218204975128174,
"learning_rate": 1.1067503924646782e-06,
"loss": 1.0721,
"step": 1134
},
{
"epoch": 0.890894819466248,
"grad_norm": 0.2882716655731201,
"learning_rate": 1.098901098901099e-06,
"loss": 1.0527,
"step": 1135
},
{
"epoch": 0.8916797488226059,
"grad_norm": 0.2993222177028656,
"learning_rate": 1.0910518053375198e-06,
"loss": 1.0707,
"step": 1136
},
{
"epoch": 0.8924646781789639,
"grad_norm": 0.29466983675956726,
"learning_rate": 1.0832025117739405e-06,
"loss": 1.0645,
"step": 1137
},
{
"epoch": 0.8932496075353218,
"grad_norm": 0.29492950439453125,
"learning_rate": 1.075353218210361e-06,
"loss": 1.0756,
"step": 1138
},
{
"epoch": 0.8940345368916798,
"grad_norm": 0.2911151647567749,
"learning_rate": 1.0675039246467819e-06,
"loss": 1.0676,
"step": 1139
},
{
"epoch": 0.8948194662480377,
"grad_norm": 0.3016468286514282,
"learning_rate": 1.0596546310832026e-06,
"loss": 1.0752,
"step": 1140
},
{
"epoch": 0.8956043956043956,
"grad_norm": 0.2953685522079468,
"learning_rate": 1.0518053375196232e-06,
"loss": 1.001,
"step": 1141
},
{
"epoch": 0.8963893249607535,
"grad_norm": 0.2959998548030853,
"learning_rate": 1.0439560439560442e-06,
"loss": 1.0695,
"step": 1142
},
{
"epoch": 0.8971742543171115,
"grad_norm": 0.29669925570487976,
"learning_rate": 1.0361067503924647e-06,
"loss": 1.0214,
"step": 1143
},
{
"epoch": 0.8979591836734694,
"grad_norm": 0.28717902302742004,
"learning_rate": 1.0282574568288855e-06,
"loss": 1.0687,
"step": 1144
},
{
"epoch": 0.8987441130298273,
"grad_norm": 0.29382869601249695,
"learning_rate": 1.0204081632653063e-06,
"loss": 1.0213,
"step": 1145
},
{
"epoch": 0.8995290423861853,
"grad_norm": 0.2865571081638336,
"learning_rate": 1.0125588697017268e-06,
"loss": 1.0308,
"step": 1146
},
{
"epoch": 0.9003139717425431,
"grad_norm": 0.3298538029193878,
"learning_rate": 1.0047095761381478e-06,
"loss": 1.0949,
"step": 1147
},
{
"epoch": 0.9010989010989011,
"grad_norm": 0.2812543511390686,
"learning_rate": 9.968602825745683e-07,
"loss": 1.0392,
"step": 1148
},
{
"epoch": 0.901883830455259,
"grad_norm": 0.29315754771232605,
"learning_rate": 9.890109890109891e-07,
"loss": 1.0201,
"step": 1149
},
{
"epoch": 0.902668759811617,
"grad_norm": 0.3410029411315918,
"learning_rate": 9.811616954474099e-07,
"loss": 1.1048,
"step": 1150
},
{
"epoch": 0.9034536891679749,
"grad_norm": 0.2857743203639984,
"learning_rate": 9.733124018838304e-07,
"loss": 1.0611,
"step": 1151
},
{
"epoch": 0.9042386185243328,
"grad_norm": 0.29381293058395386,
"learning_rate": 9.654631083202512e-07,
"loss": 1.0146,
"step": 1152
},
{
"epoch": 0.9050235478806907,
"grad_norm": 0.29993733763694763,
"learning_rate": 9.57613814756672e-07,
"loss": 1.0831,
"step": 1153
},
{
"epoch": 0.9058084772370487,
"grad_norm": 0.2960602343082428,
"learning_rate": 9.497645211930926e-07,
"loss": 1.0313,
"step": 1154
},
{
"epoch": 0.9065934065934066,
"grad_norm": 0.3170572817325592,
"learning_rate": 9.419152276295135e-07,
"loss": 1.0092,
"step": 1155
},
{
"epoch": 0.9073783359497645,
"grad_norm": 0.4631412625312805,
"learning_rate": 9.340659340659342e-07,
"loss": 1.0447,
"step": 1156
},
{
"epoch": 0.9081632653061225,
"grad_norm": 0.2946299910545349,
"learning_rate": 9.262166405023548e-07,
"loss": 1.0705,
"step": 1157
},
{
"epoch": 0.9089481946624803,
"grad_norm": 0.30375024676322937,
"learning_rate": 9.183673469387756e-07,
"loss": 1.0704,
"step": 1158
},
{
"epoch": 0.9097331240188383,
"grad_norm": 0.2881094515323639,
"learning_rate": 9.105180533751963e-07,
"loss": 1.0145,
"step": 1159
},
{
"epoch": 0.9105180533751962,
"grad_norm": 0.29087066650390625,
"learning_rate": 9.026687598116169e-07,
"loss": 1.0452,
"step": 1160
},
{
"epoch": 0.9113029827315542,
"grad_norm": 0.29212790727615356,
"learning_rate": 8.948194662480378e-07,
"loss": 1.0573,
"step": 1161
},
{
"epoch": 0.9120879120879121,
"grad_norm": 0.2992939352989197,
"learning_rate": 8.869701726844585e-07,
"loss": 1.0425,
"step": 1162
},
{
"epoch": 0.9128728414442701,
"grad_norm": 0.29093456268310547,
"learning_rate": 8.791208791208792e-07,
"loss": 1.0422,
"step": 1163
},
{
"epoch": 0.9136577708006279,
"grad_norm": 0.2929815351963043,
"learning_rate": 8.712715855572999e-07,
"loss": 1.0586,
"step": 1164
},
{
"epoch": 0.9144427001569859,
"grad_norm": 0.36663711071014404,
"learning_rate": 8.634222919937206e-07,
"loss": 1.0573,
"step": 1165
},
{
"epoch": 0.9152276295133438,
"grad_norm": 0.3045317232608795,
"learning_rate": 8.555729984301414e-07,
"loss": 1.1,
"step": 1166
},
{
"epoch": 0.9160125588697017,
"grad_norm": 0.30183184146881104,
"learning_rate": 8.477237048665621e-07,
"loss": 1.036,
"step": 1167
},
{
"epoch": 0.9167974882260597,
"grad_norm": 0.3052210509777069,
"learning_rate": 8.398744113029828e-07,
"loss": 1.0897,
"step": 1168
},
{
"epoch": 0.9175824175824175,
"grad_norm": 0.3007582724094391,
"learning_rate": 8.320251177394035e-07,
"loss": 1.0259,
"step": 1169
},
{
"epoch": 0.9183673469387755,
"grad_norm": 0.3091794550418854,
"learning_rate": 8.241758241758242e-07,
"loss": 1.0354,
"step": 1170
},
{
"epoch": 0.9191522762951334,
"grad_norm": 0.30170249938964844,
"learning_rate": 8.163265306122449e-07,
"loss": 1.025,
"step": 1171
},
{
"epoch": 0.9199372056514914,
"grad_norm": 0.5053988695144653,
"learning_rate": 8.084772370486657e-07,
"loss": 1.087,
"step": 1172
},
{
"epoch": 0.9207221350078493,
"grad_norm": 0.3013533055782318,
"learning_rate": 8.006279434850864e-07,
"loss": 1.0278,
"step": 1173
},
{
"epoch": 0.9215070643642073,
"grad_norm": 0.3028901517391205,
"learning_rate": 7.927786499215072e-07,
"loss": 1.0686,
"step": 1174
},
{
"epoch": 0.9222919937205651,
"grad_norm": 0.28716418147087097,
"learning_rate": 7.849293563579278e-07,
"loss": 1.0335,
"step": 1175
},
{
"epoch": 0.9230769230769231,
"grad_norm": 0.3054925501346588,
"learning_rate": 7.770800627943485e-07,
"loss": 1.0697,
"step": 1176
},
{
"epoch": 0.923861852433281,
"grad_norm": 0.3063417673110962,
"learning_rate": 7.692307692307694e-07,
"loss": 1.061,
"step": 1177
},
{
"epoch": 0.9246467817896389,
"grad_norm": 0.28995341062545776,
"learning_rate": 7.6138147566719e-07,
"loss": 1.0103,
"step": 1178
},
{
"epoch": 0.9254317111459969,
"grad_norm": 0.2932472825050354,
"learning_rate": 7.535321821036107e-07,
"loss": 1.0164,
"step": 1179
},
{
"epoch": 0.9262166405023547,
"grad_norm": 0.3210296034812927,
"learning_rate": 7.456828885400315e-07,
"loss": 1.118,
"step": 1180
},
{
"epoch": 0.9270015698587127,
"grad_norm": 0.30883345007896423,
"learning_rate": 7.378335949764521e-07,
"loss": 1.0462,
"step": 1181
},
{
"epoch": 0.9277864992150706,
"grad_norm": 0.2974553406238556,
"learning_rate": 7.29984301412873e-07,
"loss": 1.0685,
"step": 1182
},
{
"epoch": 0.9285714285714286,
"grad_norm": 0.2933749258518219,
"learning_rate": 7.221350078492937e-07,
"loss": 1.0296,
"step": 1183
},
{
"epoch": 0.9293563579277865,
"grad_norm": 0.2952398657798767,
"learning_rate": 7.142857142857143e-07,
"loss": 1.015,
"step": 1184
},
{
"epoch": 0.9301412872841445,
"grad_norm": 0.3025822043418884,
"learning_rate": 7.064364207221351e-07,
"loss": 1.0847,
"step": 1185
},
{
"epoch": 0.9309262166405023,
"grad_norm": 0.2824312448501587,
"learning_rate": 6.985871271585557e-07,
"loss": 0.9973,
"step": 1186
},
{
"epoch": 0.9317111459968603,
"grad_norm": 0.29934030771255493,
"learning_rate": 6.907378335949764e-07,
"loss": 1.0566,
"step": 1187
},
{
"epoch": 0.9324960753532182,
"grad_norm": 0.29235753417015076,
"learning_rate": 6.828885400313973e-07,
"loss": 0.9836,
"step": 1188
},
{
"epoch": 0.9332810047095761,
"grad_norm": 0.3037624955177307,
"learning_rate": 6.750392464678179e-07,
"loss": 1.0722,
"step": 1189
},
{
"epoch": 0.9340659340659341,
"grad_norm": 0.2828160524368286,
"learning_rate": 6.671899529042387e-07,
"loss": 1.0393,
"step": 1190
},
{
"epoch": 0.934850863422292,
"grad_norm": 0.28773877024650574,
"learning_rate": 6.593406593406594e-07,
"loss": 1.0417,
"step": 1191
},
{
"epoch": 0.9356357927786499,
"grad_norm": 0.2887474596500397,
"learning_rate": 6.5149136577708e-07,
"loss": 1.0321,
"step": 1192
},
{
"epoch": 0.9364207221350078,
"grad_norm": 0.2963563799858093,
"learning_rate": 6.436420722135009e-07,
"loss": 1.0139,
"step": 1193
},
{
"epoch": 0.9372056514913658,
"grad_norm": 0.2964331805706024,
"learning_rate": 6.357927786499216e-07,
"loss": 1.0662,
"step": 1194
},
{
"epoch": 0.9379905808477237,
"grad_norm": 0.29119017720222473,
"learning_rate": 6.279434850863422e-07,
"loss": 1.0797,
"step": 1195
},
{
"epoch": 0.9387755102040817,
"grad_norm": 0.32927101850509644,
"learning_rate": 6.20094191522763e-07,
"loss": 1.0963,
"step": 1196
},
{
"epoch": 0.9395604395604396,
"grad_norm": 0.2921772003173828,
"learning_rate": 6.122448979591837e-07,
"loss": 1.0088,
"step": 1197
},
{
"epoch": 0.9403453689167975,
"grad_norm": 0.28854402899742126,
"learning_rate": 6.043956043956044e-07,
"loss": 1.0501,
"step": 1198
},
{
"epoch": 0.9411302982731554,
"grad_norm": 0.30962881445884705,
"learning_rate": 5.965463108320252e-07,
"loss": 1.1162,
"step": 1199
},
{
"epoch": 0.9419152276295133,
"grad_norm": 0.2930225729942322,
"learning_rate": 5.886970172684459e-07,
"loss": 1.0008,
"step": 1200
},
{
"epoch": 0.9427001569858713,
"grad_norm": 0.3310118317604065,
"learning_rate": 5.808477237048666e-07,
"loss": 1.0714,
"step": 1201
},
{
"epoch": 0.9434850863422292,
"grad_norm": 0.300510048866272,
"learning_rate": 5.729984301412873e-07,
"loss": 1.054,
"step": 1202
},
{
"epoch": 0.9442700156985872,
"grad_norm": 0.3338243067264557,
"learning_rate": 5.651491365777081e-07,
"loss": 1.0849,
"step": 1203
},
{
"epoch": 0.945054945054945,
"grad_norm": 0.29972943663597107,
"learning_rate": 5.572998430141287e-07,
"loss": 1.0148,
"step": 1204
},
{
"epoch": 0.945839874411303,
"grad_norm": 0.29417991638183594,
"learning_rate": 5.494505494505495e-07,
"loss": 1.0307,
"step": 1205
},
{
"epoch": 0.9466248037676609,
"grad_norm": 0.2904272675514221,
"learning_rate": 5.416012558869703e-07,
"loss": 1.0504,
"step": 1206
},
{
"epoch": 0.9474097331240189,
"grad_norm": 0.37245574593544006,
"learning_rate": 5.337519623233909e-07,
"loss": 1.077,
"step": 1207
},
{
"epoch": 0.9481946624803768,
"grad_norm": 0.3075472414493561,
"learning_rate": 5.259026687598116e-07,
"loss": 1.0483,
"step": 1208
},
{
"epoch": 0.9489795918367347,
"grad_norm": 0.29694482684135437,
"learning_rate": 5.180533751962324e-07,
"loss": 1.0239,
"step": 1209
},
{
"epoch": 0.9497645211930926,
"grad_norm": 0.3054351806640625,
"learning_rate": 5.102040816326531e-07,
"loss": 1.0446,
"step": 1210
},
{
"epoch": 0.9505494505494505,
"grad_norm": 0.30204030871391296,
"learning_rate": 5.023547880690739e-07,
"loss": 1.0195,
"step": 1211
},
{
"epoch": 0.9513343799058085,
"grad_norm": 0.29818445444107056,
"learning_rate": 4.945054945054946e-07,
"loss": 1.0322,
"step": 1212
},
{
"epoch": 0.9521193092621664,
"grad_norm": 0.4030686914920807,
"learning_rate": 4.866562009419152e-07,
"loss": 0.9908,
"step": 1213
},
{
"epoch": 0.9529042386185244,
"grad_norm": 0.2839055061340332,
"learning_rate": 4.78806907378336e-07,
"loss": 1.0366,
"step": 1214
},
{
"epoch": 0.9536891679748822,
"grad_norm": 0.29423198103904724,
"learning_rate": 4.7095761381475676e-07,
"loss": 1.038,
"step": 1215
},
{
"epoch": 0.9544740973312402,
"grad_norm": 0.30914145708084106,
"learning_rate": 4.631083202511774e-07,
"loss": 1.1039,
"step": 1216
},
{
"epoch": 0.9552590266875981,
"grad_norm": 0.3080761730670929,
"learning_rate": 4.5525902668759813e-07,
"loss": 1.1072,
"step": 1217
},
{
"epoch": 0.9560439560439561,
"grad_norm": 0.3054615259170532,
"learning_rate": 4.474097331240189e-07,
"loss": 1.0865,
"step": 1218
},
{
"epoch": 0.956828885400314,
"grad_norm": 0.3009425103664398,
"learning_rate": 4.395604395604396e-07,
"loss": 1.0368,
"step": 1219
},
{
"epoch": 0.957613814756672,
"grad_norm": 0.28634223341941833,
"learning_rate": 4.317111459968603e-07,
"loss": 1.0526,
"step": 1220
},
{
"epoch": 0.9583987441130298,
"grad_norm": 0.2902422547340393,
"learning_rate": 4.2386185243328105e-07,
"loss": 1.0402,
"step": 1221
},
{
"epoch": 0.9591836734693877,
"grad_norm": 0.31759124994277954,
"learning_rate": 4.1601255886970176e-07,
"loss": 1.0519,
"step": 1222
},
{
"epoch": 0.9599686028257457,
"grad_norm": 0.4426363408565521,
"learning_rate": 4.0816326530612243e-07,
"loss": 1.017,
"step": 1223
},
{
"epoch": 0.9607535321821036,
"grad_norm": 0.3140206038951874,
"learning_rate": 4.003139717425432e-07,
"loss": 1.0142,
"step": 1224
},
{
"epoch": 0.9615384615384616,
"grad_norm": 0.29569703340530396,
"learning_rate": 3.924646781789639e-07,
"loss": 1.0528,
"step": 1225
},
{
"epoch": 0.9623233908948194,
"grad_norm": 0.30597856640815735,
"learning_rate": 3.846153846153847e-07,
"loss": 1.0253,
"step": 1226
},
{
"epoch": 0.9631083202511774,
"grad_norm": 0.3235307037830353,
"learning_rate": 3.7676609105180534e-07,
"loss": 1.1179,
"step": 1227
},
{
"epoch": 0.9638932496075353,
"grad_norm": 0.2932690680027008,
"learning_rate": 3.6891679748822606e-07,
"loss": 1.0423,
"step": 1228
},
{
"epoch": 0.9646781789638933,
"grad_norm": 0.3032452166080475,
"learning_rate": 3.610675039246468e-07,
"loss": 1.0115,
"step": 1229
},
{
"epoch": 0.9654631083202512,
"grad_norm": 0.3229339122772217,
"learning_rate": 3.5321821036106754e-07,
"loss": 1.0766,
"step": 1230
},
{
"epoch": 0.966248037676609,
"grad_norm": 0.3844963312149048,
"learning_rate": 3.453689167974882e-07,
"loss": 1.1032,
"step": 1231
},
{
"epoch": 0.967032967032967,
"grad_norm": 0.2927655279636383,
"learning_rate": 3.3751962323390897e-07,
"loss": 1.052,
"step": 1232
},
{
"epoch": 0.9678178963893249,
"grad_norm": 0.3003545105457306,
"learning_rate": 3.296703296703297e-07,
"loss": 1.0825,
"step": 1233
},
{
"epoch": 0.9686028257456829,
"grad_norm": 0.32615581154823303,
"learning_rate": 3.2182103610675046e-07,
"loss": 1.0915,
"step": 1234
},
{
"epoch": 0.9693877551020408,
"grad_norm": 0.31106311082839966,
"learning_rate": 3.139717425431711e-07,
"loss": 1.0891,
"step": 1235
},
{
"epoch": 0.9701726844583988,
"grad_norm": 0.29504525661468506,
"learning_rate": 3.0612244897959183e-07,
"loss": 1.0303,
"step": 1236
},
{
"epoch": 0.9709576138147566,
"grad_norm": 0.3173236548900604,
"learning_rate": 2.982731554160126e-07,
"loss": 1.0699,
"step": 1237
},
{
"epoch": 0.9717425431711146,
"grad_norm": 0.2948251962661743,
"learning_rate": 2.904238618524333e-07,
"loss": 1.0273,
"step": 1238
},
{
"epoch": 0.9725274725274725,
"grad_norm": 0.3042560815811157,
"learning_rate": 2.8257456828885403e-07,
"loss": 1.019,
"step": 1239
},
{
"epoch": 0.9733124018838305,
"grad_norm": 0.3046058118343353,
"learning_rate": 2.7472527472527475e-07,
"loss": 1.0301,
"step": 1240
},
{
"epoch": 0.9740973312401884,
"grad_norm": 0.2964264452457428,
"learning_rate": 2.6687598116169546e-07,
"loss": 1.055,
"step": 1241
},
{
"epoch": 0.9748822605965463,
"grad_norm": 0.43890246748924255,
"learning_rate": 2.590266875981162e-07,
"loss": 1.0003,
"step": 1242
},
{
"epoch": 0.9756671899529042,
"grad_norm": 0.3069480359554291,
"learning_rate": 2.5117739403453695e-07,
"loss": 1.0983,
"step": 1243
},
{
"epoch": 0.9764521193092621,
"grad_norm": 0.2868310213088989,
"learning_rate": 2.433281004709576e-07,
"loss": 1.0251,
"step": 1244
},
{
"epoch": 0.9772370486656201,
"grad_norm": 0.28800535202026367,
"learning_rate": 2.3547880690737838e-07,
"loss": 1.0251,
"step": 1245
},
{
"epoch": 0.978021978021978,
"grad_norm": 0.290998637676239,
"learning_rate": 2.2762951334379907e-07,
"loss": 0.996,
"step": 1246
},
{
"epoch": 0.978806907378336,
"grad_norm": 0.3080032467842102,
"learning_rate": 2.197802197802198e-07,
"loss": 1.1012,
"step": 1247
},
{
"epoch": 0.9795918367346939,
"grad_norm": 0.3035239577293396,
"learning_rate": 2.1193092621664052e-07,
"loss": 1.0589,
"step": 1248
},
{
"epoch": 0.9803767660910518,
"grad_norm": 0.3100905120372772,
"learning_rate": 2.0408163265306121e-07,
"loss": 1.0276,
"step": 1249
},
{
"epoch": 0.9811616954474097,
"grad_norm": 0.30332428216934204,
"learning_rate": 1.9623233908948196e-07,
"loss": 1.0549,
"step": 1250
},
{
"epoch": 0.9819466248037677,
"grad_norm": 0.2909112572669983,
"learning_rate": 1.8838304552590267e-07,
"loss": 1.0492,
"step": 1251
},
{
"epoch": 0.9827315541601256,
"grad_norm": 0.2939865291118622,
"learning_rate": 1.805337519623234e-07,
"loss": 1.0673,
"step": 1252
},
{
"epoch": 0.9835164835164835,
"grad_norm": 0.31474772095680237,
"learning_rate": 1.726844583987441e-07,
"loss": 1.0801,
"step": 1253
},
{
"epoch": 0.9843014128728415,
"grad_norm": 0.2975396513938904,
"learning_rate": 1.6483516483516484e-07,
"loss": 1.0645,
"step": 1254
},
{
"epoch": 0.9850863422291993,
"grad_norm": 0.2967347204685211,
"learning_rate": 1.5698587127158556e-07,
"loss": 0.9959,
"step": 1255
},
{
"epoch": 0.9858712715855573,
"grad_norm": 0.2978781461715698,
"learning_rate": 1.491365777080063e-07,
"loss": 1.0541,
"step": 1256
},
{
"epoch": 0.9866562009419152,
"grad_norm": 0.37754517793655396,
"learning_rate": 1.4128728414442702e-07,
"loss": 1.0134,
"step": 1257
},
{
"epoch": 0.9874411302982732,
"grad_norm": 0.28837668895721436,
"learning_rate": 1.3343799058084773e-07,
"loss": 1.0435,
"step": 1258
},
{
"epoch": 0.9882260596546311,
"grad_norm": 0.2922952473163605,
"learning_rate": 1.2558869701726847e-07,
"loss": 1.0211,
"step": 1259
},
{
"epoch": 0.989010989010989,
"grad_norm": 0.37126073241233826,
"learning_rate": 1.1773940345368919e-07,
"loss": 1.107,
"step": 1260
},
{
"epoch": 0.9897959183673469,
"grad_norm": 0.30882400274276733,
"learning_rate": 1.098901098901099e-07,
"loss": 1.0274,
"step": 1261
},
{
"epoch": 0.9905808477237049,
"grad_norm": 0.30320990085601807,
"learning_rate": 1.0204081632653061e-07,
"loss": 1.0478,
"step": 1262
},
{
"epoch": 0.9913657770800628,
"grad_norm": 0.305622398853302,
"learning_rate": 9.419152276295134e-08,
"loss": 1.0782,
"step": 1263
},
{
"epoch": 0.9921507064364207,
"grad_norm": 0.2983652353286743,
"learning_rate": 8.634222919937205e-08,
"loss": 1.0486,
"step": 1264
},
{
"epoch": 0.9929356357927787,
"grad_norm": 0.3014610707759857,
"learning_rate": 7.849293563579278e-08,
"loss": 1.0696,
"step": 1265
},
{
"epoch": 0.9937205651491365,
"grad_norm": 0.3930485248565674,
"learning_rate": 7.064364207221351e-08,
"loss": 1.0056,
"step": 1266
},
{
"epoch": 0.9945054945054945,
"grad_norm": 0.30500683188438416,
"learning_rate": 6.279434850863424e-08,
"loss": 1.0317,
"step": 1267
},
{
"epoch": 0.9952904238618524,
"grad_norm": 0.29284876585006714,
"learning_rate": 5.494505494505495e-08,
"loss": 1.0596,
"step": 1268
},
{
"epoch": 0.9960753532182104,
"grad_norm": 0.28736308217048645,
"learning_rate": 4.709576138147567e-08,
"loss": 0.9853,
"step": 1269
},
{
"epoch": 0.9968602825745683,
"grad_norm": 0.2951013445854187,
"learning_rate": 3.924646781789639e-08,
"loss": 1.0667,
"step": 1270
},
{
"epoch": 0.9976452119309263,
"grad_norm": 0.396849662065506,
"learning_rate": 3.139717425431712e-08,
"loss": 1.1164,
"step": 1271
},
{
"epoch": 0.9984301412872841,
"grad_norm": 0.3089428246021271,
"learning_rate": 2.3547880690737834e-08,
"loss": 1.0651,
"step": 1272
},
{
"epoch": 0.9992150706436421,
"grad_norm": 0.295634388923645,
"learning_rate": 1.569858712715856e-08,
"loss": 1.027,
"step": 1273
},
{
"epoch": 1.0,
"grad_norm": 0.2972462475299835,
"learning_rate": 7.84929356357928e-09,
"loss": 1.0419,
"step": 1274
}
],
"logging_steps": 1.0,
"max_steps": 1274,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.964361379037053e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}